From 5b801ba9f9511265e9e1d6cd7c28db9098fe719c Mon Sep 17 00:00:00 2001 From: Sarvesh Rangnekar Date: Fri, 5 Aug 2022 19:24:18 +0000 Subject: [PATCH] Introduce MultiCIDRRangeAllocator MultiCIDRRangeAllocator is a new Range Allocator which makes using multiple ClusterCIDRs possible. It consists of two controllers, one for reconciling the ClusterCIDR API objects and the other for allocating Pod CIDRs to the nodes. The allocation is based on the rules defined in https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2593-multiple-cluster-cidrs --- .../.import-restrictions | 1 + .../nodeipamcontroller.go | 9 + cmd/kube-controller-manager/app/core.go | 8 + .../nodeipam/ipam/cidr_allocator.go | 43 +- .../ipam/multi_cidr_range_allocator.go | 1205 +++++++++++ .../ipam/multi_cidr_range_allocator_test.go | 1868 +++++++++++++++++ .../nodeipam/ipam/range_allocator.go | 18 +- .../nodeipam/ipam/range_allocator_test.go | 54 +- pkg/controller/nodeipam/ipam/test/utils.go | 33 + .../nodeipam/node_ipam_controller.go | 15 +- .../nodeipam/node_ipam_controller_test.go | 6 +- pkg/features/kube_features.go | 9 + test/integration/ipamperf/ipam_test.go | 6 +- 13 files changed, 3198 insertions(+), 77 deletions(-) create mode 100644 pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go create mode 100644 pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go diff --git a/cmd/cloud-controller-manager/.import-restrictions b/cmd/cloud-controller-manager/.import-restrictions index 151e8744804..695312b7312 100644 --- a/cmd/cloud-controller-manager/.import-restrictions +++ b/cmd/cloud-controller-manager/.import-restrictions @@ -39,4 +39,5 @@ rules: - k8s.io/kubernetes/pkg/util/taints - k8s.io/kubernetes/pkg/proxy/util - k8s.io/kubernetes/pkg/proxy/util/testing + - k8s.io/kubernetes/pkg/util/slice - k8s.io/kubernetes/pkg/util/sysctl \ No newline at end of file diff --git a/cmd/cloud-controller-manager/nodeipamcontroller.go b/cmd/cloud-controller-manager/nodeipamcontroller.go index b03a9cd4a09..bcd6211342f 100644 --- a/cmd/cloud-controller-manager/nodeipamcontroller.go +++ b/cmd/cloud-controller-manager/nodeipamcontroller.go @@ -26,6 +26,8 @@ import ( "net" "strings" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/informers/networking/v1alpha1" cloudprovider "k8s.io/cloud-provider" "k8s.io/cloud-provider/app" cloudcontrollerconfig "k8s.io/cloud-provider/app/config" @@ -36,6 +38,7 @@ import ( nodeipamcontroller "k8s.io/kubernetes/pkg/controller/nodeipam" nodeipamconfig "k8s.io/kubernetes/pkg/controller/nodeipam/config" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam" + "k8s.io/kubernetes/pkg/features" netutils "k8s.io/utils/net" ) @@ -120,8 +123,14 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c return nil, false, err } + var clusterCIDRInformer v1alpha1.ClusterCIDRInformer + if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { + clusterCIDRInformer = ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs() + } + nodeIpamController, err := nodeipamcontroller.NewNodeIpamController( ctx.InformerFactory.Core().V1().Nodes(), + clusterCIDRInformer, cloud, ctx.ClientBuilder.ClientOrDie(initContext.ClientName), clusterCIDRs, diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 991b29a4347..336adf82c4a 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -27,7 +27,9 @@ import ( "strings" "time" + "k8s.io/client-go/informers/networking/v1alpha1" "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -153,8 +155,14 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo return nil, false, err } + var clusterCIDRInformer v1alpha1.ClusterCIDRInformer + if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { + clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs() + } + nodeIpamController, err := nodeipamcontroller.NewNodeIpamController( controllerContext.InformerFactory.Core().V1().Nodes(), + clusterCIDRInformer, controllerContext.Cloud, controllerContext.ClientBuilder.ClientOrDie("node-controller"), clusterCIDRs, diff --git a/pkg/controller/nodeipam/ipam/cidr_allocator.go b/pkg/controller/nodeipam/ipam/cidr_allocator.go index 543a7797f13..4ca058eefcf 100644 --- a/pkg/controller/nodeipam/ipam/cidr_allocator.go +++ b/pkg/controller/nodeipam/ipam/cidr_allocator.go @@ -22,16 +22,18 @@ import ( "net" "time" - "k8s.io/klog/v2" - "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" informers "k8s.io/client-go/informers/core/v1" + networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" clientset "k8s.io/client-go/kubernetes" cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" ) // CIDRAllocatorType is the type of the allocator to use. @@ -41,6 +43,9 @@ const ( // RangeAllocatorType is the allocator that uses an internal CIDR // range allocator to do node CIDR range allocations. RangeAllocatorType CIDRAllocatorType = "RangeAllocator" + // MultiCIDRRangeAllocatorType is the allocator that uses an internal CIDR + // range allocator to do node CIDR range allocations. + MultiCIDRRangeAllocatorType CIDRAllocatorType = "MultiCIDRRangeAllocator" // CloudAllocatorType is the allocator that uses cloud platform // support to do node CIDR range allocations. CloudAllocatorType CIDRAllocatorType = "CloudAllocator" @@ -87,7 +92,7 @@ type CIDRAllocator interface { // CIDR if it doesn't currently have one or mark the CIDR as used if // the node already have one. AllocateOrOccupyCIDR(node *v1.Node) error - // ReleaseCIDR releases the CIDR of the removed node + // ReleaseCIDR releases the CIDR of the removed node. ReleaseCIDR(node *v1.Node) error // Run starts all the working logic of the allocator. Run(stopCh <-chan struct{}) @@ -96,18 +101,25 @@ type CIDRAllocator interface { // CIDRAllocatorParams is parameters that's required for creating new // cidr range allocator. type CIDRAllocatorParams struct { - // ClusterCIDRs is list of cluster cidrs + // ClusterCIDRs is list of cluster cidrs. ClusterCIDRs []*net.IPNet - // ServiceCIDR is primary service cidr for cluster + // ServiceCIDR is primary service cidr for cluster. ServiceCIDR *net.IPNet - // SecondaryServiceCIDR is secondary service cidr for cluster + // SecondaryServiceCIDR is secondary service cidr for cluster. SecondaryServiceCIDR *net.IPNet - // NodeCIDRMaskSizes is list of node cidr mask sizes + // NodeCIDRMaskSizes is list of node cidr mask sizes. NodeCIDRMaskSizes []int } +// CIDRs are reserved, then node resource is patched with them. +// nodeReservedCIDRs holds the reservation info for a node. +type nodeReservedCIDRs struct { + allocatedCIDRs []*net.IPNet + nodeName string +} + // New creates a new CIDR range allocator. -func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) { +func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) { nodeList, err := listNodes(kubeClient) if err != nil { return nil, err @@ -116,6 +128,12 @@ func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInfo switch allocatorType { case RangeAllocatorType: return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList) + case MultiCIDRRangeAllocatorType: + if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { + return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator) + } + return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil) + case CloudAllocatorType: return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer) default: @@ -144,3 +162,12 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) { } return nodeList, nil } + +// ipnetToStringList converts a slice of net.IPNet into a list of CIDR in string format +func ipnetToStringList(inCIDRs []*net.IPNet) []string { + outCIDRs := make([]string, len(inCIDRs)) + for idx, inCIDR := range inCIDRs { + outCIDRs[idx] = inCIDR.String() + } + return outCIDRs +} diff --git a/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go new file mode 100644 index 00000000000..5fb96887df2 --- /dev/null +++ b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go @@ -0,0 +1,1205 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "container/heap" + "context" + "errors" + "fmt" + "math" + "math/rand" + "net" + "sync" + "time" + + "k8s.io/api/core/v1" + networkingv1alpha1 "k8s.io/api/networking/v1alpha1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + informers "k8s.io/client-go/informers/core/v1" + networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + networkinglisters "k8s.io/client-go/listers/networking/v1alpha1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + "k8s.io/component-base/metrics/prometheus/ratelimiter" + nodeutil "k8s.io/component-helpers/node/util" + "k8s.io/klog/v2" + v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" + controllerutil "k8s.io/kubernetes/pkg/controller/util/node" + "k8s.io/kubernetes/pkg/util/slice" + netutil "k8s.io/utils/net" +) + +const ( + defaultClusterCIDRKey = "kubernetes.io/clusterCIDR" + defaultClusterCIDRValue = "default" + defaultClusterCIDRName = "default-cluster-cidr" + defaultClusterCIDRAPIVersion = "networking.k8s.io/v1alpha1" + clusterCIDRFinalizer = "networking.k8s.io/cluster-cidr-finalizer" + ipv4MaxCIDRMask = 32 + ipv6MaxCIDRMask = 128 + minPerNodeHostBits = 4 +) + +// CIDRs are reserved, then node resource is patched with them. +// multiCIDRNodeReservedCIDRs holds the reservation info for a node. +type multiCIDRNodeReservedCIDRs struct { + nodeReservedCIDRs + clusterCIDR *cidrset.ClusterCIDR +} + +// multiCIDRNodeProcessingInfo tracks information related to current nodes in processing +type multiCIDRNodeProcessingInfo struct { + retries int +} + +type multiCIDRRangeAllocator struct { + client clientset.Interface + // nodeLister is able to list/get nodes and is populated by the shared informer passed to controller. + nodeLister corelisters.NodeLister + // nodesSynced returns true if the node shared informer has been synced at least once. + nodesSynced cache.InformerSynced + // clusterCIDRLister is able to list/get clustercidrs and is populated by the shared informer passed to controller. + clusterCIDRLister networkinglisters.ClusterCIDRLister + // clusterCIDRSynced returns true if the clustercidr shared informer has been synced at least once. + clusterCIDRSynced cache.InformerSynced + // Channel that is used to pass updating Nodes and their reserved CIDRs to the background. + // This increases a throughput of CIDR assignment by not blocking on long operations. + nodeCIDRUpdateChannel chan multiCIDRNodeReservedCIDRs + recorder record.EventRecorder + // queue is where incoming work is placed to de-dup and to allow "easy" + // rate limited requeues on errors + queue workqueue.RateLimitingInterface + + // lock guards nodesInProcessing and cidrMap to avoid races in CIDR allocation. + lock *sync.Mutex + // nodesInProcessing is a set of nodes that are currently being processed. + nodesInProcessing map[string]*multiCIDRNodeProcessingInfo + // cidrMap maps ClusterCIDR labels to internal ClusterCIDR objects. + cidrMap map[string][]*cidrset.ClusterCIDR +} + +// NewMultiCIDRRangeAllocator returns a CIDRAllocator to allocate CIDRs for node (one for each ip family). +// Caller must always pass in a list of existing nodes to the new allocator. +// NodeList is only nil in testing. +func NewMultiCIDRRangeAllocator( + client clientset.Interface, + nodeInformer informers.NodeInformer, + clusterCIDRInformer networkinginformers.ClusterCIDRInformer, + allocatorParams CIDRAllocatorParams, + nodeList *v1.NodeList, + testCIDRMap map[string][]*cidrset.ClusterCIDR, +) (CIDRAllocator, error) { + if client == nil { + klog.Fatalf("client is nil") + } + + eventBroadcaster := record.NewBroadcaster() + eventSource := v1.EventSource{ + Component: "multiCIDRRangeAllocator", + } + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, eventSource) + eventBroadcaster.StartStructuredLogging(0) + klog.V(0).Infof("Started sending events to API Server. (EventSource = %v)", eventSource) + + eventBroadcaster.StartRecordingToSink( + &v1core.EventSinkImpl{ + Interface: client.CoreV1().Events(""), + }) + + if client.CoreV1().RESTClient().GetRateLimiter() != nil { + ratelimiter.RegisterMetricAndTrackRateLimiterUsage("multi_cidr_range_allocator", client.CoreV1().RESTClient().GetRateLimiter()) + } + + ra := &multiCIDRRangeAllocator{ + client: client, + nodeLister: nodeInformer.Lister(), + nodesSynced: nodeInformer.Informer().HasSynced, + clusterCIDRLister: clusterCIDRInformer.Lister(), + clusterCIDRSynced: clusterCIDRInformer.Informer().HasSynced, + nodeCIDRUpdateChannel: make(chan multiCIDRNodeReservedCIDRs, cidrUpdateQueueSize), + recorder: recorder, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "multi_cidr_range_allocator"), + lock: &sync.Mutex{}, + nodesInProcessing: map[string]*multiCIDRNodeProcessingInfo{}, + cidrMap: make(map[string][]*cidrset.ClusterCIDR, 0), + } + + // testCIDRMap is only set for testing purposes. + if len(testCIDRMap) > 0 { + ra.cidrMap = testCIDRMap + klog.Warningf("testCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug.") + } + + ccList, err := listClusterCIDRs(client) + if err != nil { + return nil, err + } + + if ccList == nil { + ccList = &networkingv1alpha1.ClusterCIDRList{} + } + createDefaultClusterCIDR(ccList, allocatorParams) + + // Regenerate the cidrMaps from the existing ClusterCIDRs. + for _, clusterCIDR := range ccList.Items { + klog.Infof("Regenerating existing ClusterCIDR: %v", clusterCIDR) + // Create an event for invalid ClusterCIDRs, do not crash on failures. + if err := ra.reconcileBootstrap(&clusterCIDR); err != nil { + klog.Errorf("Error while regenerating existing ClusterCIDR: %v", err) + ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error()) + } + } + + clusterCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: createClusterCIDRHandler(ra.reconcileCreate), + DeleteFunc: createClusterCIDRHandler(ra.reconcileDelete), + }) + + if allocatorParams.ServiceCIDR != nil { + ra.filterOutServiceRange(allocatorParams.ServiceCIDR) + } else { + klog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.") + } + + if allocatorParams.SecondaryServiceCIDR != nil { + ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR) + } else { + klog.V(0).Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.") + } + + if nodeList != nil { + for _, node := range nodeList.Items { + if len(node.Spec.PodCIDRs) == 0 { + klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name) + continue + } + klog.V(0).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDRs) + if err := ra.occupyCIDRs(&node); err != nil { + // This will happen if: + // 1. We find garbage in the podCIDRs field. Retrying is useless. + // 2. CIDR out of range: This means ClusterCIDR is not yet created + // This error will keep crashing controller-manager until the + // appropriate ClusterCIDR has been created + return nil, err + } + } + } + + nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: controllerutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR), + UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error { + // If the PodCIDRs list is not empty we either: + // - already processed a Node that already had CIDRs after NC restarted + // (cidr is marked as used), + // - already processed a Node successfully and allocated CIDRs for it + // (cidr is marked as used), + // - already processed a Node but we saw a "timeout" response and + // request eventually got through in this case we haven't released + // the allocated CIDRs (cidr is still marked as used). + // There's a possible error here: + // - NC sees a new Node and assigns CIDRs X,Y.. to it, + // - Update Node call fails with a timeout, + // - Node is updated by some other component, NC sees an update and + // assigns CIDRs A,B.. to the Node, + // - Both CIDR X,Y.. and CIDR A,B.. are marked as used in the local cache, + // even though Node sees only CIDR A,B.. + // The problem here is that in in-memory cache we see CIDR X,Y.. as marked, + // which prevents it from being assigned to any new node. The cluster + // state is correct. + // Restart of NC fixes the issue. + if len(newNode.Spec.PodCIDRs) == 0 { + return ra.AllocateOrOccupyCIDR(newNode) + } + return nil + }), + DeleteFunc: controllerutil.CreateDeleteNodeHandler(ra.ReleaseCIDR), + }) + + return ra, nil +} + +func (r *multiCIDRRangeAllocator) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + + klog.Infof("Starting Multi CIDR Range allocator") + defer klog.Infof("Shutting down Multi CIDR Range allocator") + + if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", stopCh, r.nodesSynced, r.clusterCIDRSynced) { + return + } + + // raWaitGroup is used to wait for the RangeAllocator to finish the goroutines. + var raWaitGroup sync.WaitGroup + + for i := 0; i < cidrUpdateWorkers; i++ { + raWaitGroup.Add(1) + go func() { + defer raWaitGroup.Done() + r.worker(stopCh) + }() + } + + raWaitGroup.Wait() + + <-stopCh +} + +func (r *multiCIDRRangeAllocator) worker(stopChan <-chan struct{}) { + for { + select { + case workItem, ok := <-r.nodeCIDRUpdateChannel: + if !ok { + klog.Error("Channel nodeCIDRUpdateChannel was unexpectedly closed") + return + } + r.lock.Lock() + if err := r.updateCIDRsAllocation(workItem); err == nil { + klog.V(3).Infof("Updated CIDR for %q", workItem.nodeName) + } else { + klog.Errorf("Error updating CIDR for %q: %v", workItem.nodeName, err) + if canRetry, timeout := r.retryParams(workItem.nodeName); canRetry { + klog.V(2).Infof("Retrying update for %q after %v", workItem.nodeName, timeout) + time.AfterFunc(timeout, func() { + // Requeue the failed node for update again. + r.nodeCIDRUpdateChannel <- workItem + }) + continue + } + klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem.nodeName) + } + r.removeNodeFromProcessing(workItem.nodeName) + r.lock.Unlock() + case <-stopChan: + klog.Infof("MultiCIDRRangeAllocator worker is stopping.") + return + } + } +} + +// createClusterCIDRHandler creates clusterCIDR handler. +func createClusterCIDRHandler(f func(ccc *networkingv1alpha1.ClusterCIDR) error) func(obj interface{}) { + return func(originalObj interface{}) { + ccc := originalObj.(*networkingv1alpha1.ClusterCIDR) + if err := f(ccc); err != nil { + utilruntime.HandleError(fmt.Errorf("error while processing ClusterCIDR Add/Delete: %w", err)) + } + } +} + +// needToAddFinalizer checks if a finalizer should be added to the object. +func needToAddFinalizer(obj metav1.Object, finalizer string) bool { + return obj.GetDeletionTimestamp() == nil && !slice.ContainsString(obj.GetFinalizers(), + finalizer, nil) +} + +func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error { + startTime := time.Now() + defer func() { + klog.V(4).Infof("Finished syncing clusterCIDR request %q (%v)", key, time.Since(startTime)) + }() + + clusterCIDR, err := r.clusterCIDRLister.Get(key) + if apierrors.IsNotFound(err) { + klog.V(3).Infof("clusterCIDR has been deleted: %v", key) + return nil + } + + if err != nil { + return err + } + + // Check the DeletionTimestamp to determine if object is under deletion. + if !clusterCIDR.DeletionTimestamp.IsZero() { + return r.reconcileDelete(clusterCIDR) + } + return r.reconcileCreate(clusterCIDR) +} + +func (r *multiCIDRRangeAllocator) insertNodeToProcessing(nodeName string) bool { + if _, found := r.nodesInProcessing[nodeName]; found { + return false + } + r.nodesInProcessing[nodeName] = &multiCIDRNodeProcessingInfo{} + return true +} + +func (r *multiCIDRRangeAllocator) removeNodeFromProcessing(nodeName string) { + klog.Infof("Removing node %q from processing", nodeName) + delete(r.nodesInProcessing, nodeName) +} + +func (r *multiCIDRRangeAllocator) retryParams(nodeName string) (bool, time.Duration) { + r.lock.Lock() + defer r.lock.Unlock() + + entry, ok := r.nodesInProcessing[nodeName] + if !ok { + klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName) + return false, 0 + } + + count := entry.retries + 1 + if count > updateMaxRetries { + return false, 0 + } + r.nodesInProcessing[nodeName].retries = count + + return true, multiCIDRNodeUpdateRetryTimeout(count) +} + +func multiCIDRNodeUpdateRetryTimeout(count int) time.Duration { + timeout := updateRetryTimeout + for i := 0; i < count && timeout < maxUpdateRetryTimeout; i++ { + timeout *= 2 + } + if timeout > maxUpdateRetryTimeout { + timeout = maxUpdateRetryTimeout + } + return time.Duration(timeout.Nanoseconds()/2 + rand.Int63n(timeout.Nanoseconds())) +} + +// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet. +func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error { + + err := func(node *v1.Node) error { + + if len(node.Spec.PodCIDRs) == 0 { + return nil + } + + clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node) + if err != nil { + return err + } + + for _, clusterCIDR := range clusterCIDRList { + occupiedCount := 0 + + for _, cidr := range node.Spec.PodCIDRs { + _, podCIDR, err := netutil.ParseCIDRSloppy(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err) + } + + klog.Infof("occupy CIDR %s for node: %s", cidr, node.Name) + + if err := r.Occupy(clusterCIDR, podCIDR); err != nil { + klog.V(3).Infof("Could not occupy cidr: %v, trying next range: %w", node.Spec.PodCIDRs, err) + break + } + + occupiedCount++ + } + + // Mark CIDRs as occupied only if the CCC is able to occupy all the node CIDRs. + if occupiedCount == len(node.Spec.PodCIDRs) { + clusterCIDR.AssociatedNodes[node.Name] = true + return nil + } + } + + return fmt.Errorf("could not occupy cidrs: %v, No matching ClusterCIDRs found", node.Spec.PodCIDRs) + }(node) + + r.removeNodeFromProcessing(node.Name) + return err +} + +// associatedCIDRSet returns the CIDRSet, based on the ip family of the CIDR. +func (r *multiCIDRRangeAllocator) associatedCIDRSet(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) (*cidrset.MultiCIDRSet, error) { + switch { + case netutil.IsIPv4CIDR(cidr): + return clusterCIDR.IPv4CIDRSet, nil + case netutil.IsIPv6CIDR(cidr): + return clusterCIDR.IPv6CIDRSet, nil + default: + return nil, fmt.Errorf("invalid cidr: %v", cidr) + } +} + +// Occupy marks the CIDR as occupied in the allocatedCIDRMap of the cidrSet. +func (r *multiCIDRRangeAllocator) Occupy(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error { + currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr) + if err != nil { + return err + } + + if err := currCIDRSet.Occupy(cidr); err != nil { + return fmt.Errorf("unable to occupy cidr %v in cidrSet", cidr) + } + + return nil +} + +// Release marks the CIDR as free in the cidrSet used bitmap, +// Also removes the CIDR from the allocatedCIDRSet. +func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error { + currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr) + if err != nil { + return err + } + + if err := currCIDRSet.Release(cidr); err != nil { + klog.Infof("Unable to release cidr %v in cidrSet", cidr) + return err + } + + return nil +} + +// AllocateOrOccupyCIDR allocates a CIDR to the node if the node doesn't have a +// CIDR already allocated, occupies the CIDR and marks as used if the node +// already has a PodCIDR assigned. +// WARNING: If you're adding any return calls or defer any more work from this +// function you have to make sure to update nodesInProcessing properly with the +// disposition of the node when the work is done. +func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { + r.lock.Lock() + defer r.lock.Unlock() + + if node == nil { + return nil + } + + if !r.insertNodeToProcessing(node.Name) { + klog.Infof("Node %v is already in a process of CIDR assignment.", node.Name) + return nil + } + + if len(node.Spec.PodCIDRs) > 0 { + return r.occupyCIDRs(node) + } + + cidrs, clusterCIDR, err := r.prioritizedCIDRs(node) + if err != nil { + r.removeNodeFromProcessing(node.Name) + controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") + return fmt.Errorf("failed to get cidrs for node %s", node.Name) + } + + if len(cidrs) == 0 { + r.removeNodeFromProcessing(node.Name) + controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") + return fmt.Errorf("no cidrSets with matching labels found for node %s", node.Name) + } + + // allocate and queue the assignment. + allocated := multiCIDRNodeReservedCIDRs{ + nodeReservedCIDRs: nodeReservedCIDRs{ + nodeName: node.Name, + allocatedCIDRs: cidrs, + }, + clusterCIDR: clusterCIDR, + } + + return r.updateCIDRsAllocation(allocated) +} + +// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets. +func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error { + r.lock.Lock() + defer r.lock.Unlock() + + if node == nil || len(node.Spec.PodCIDRs) == 0 { + return nil + } + + clusterCIDR, err := r.allocatedClusterCIDR(node) + if err != nil { + return err + } + + for _, cidr := range node.Spec.PodCIDRs { + _, podCIDR, err := netutil.ParseCIDRSloppy(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err) + } + + klog.Infof("release CIDR %s for node: %s", cidr, node.Name) + if err := r.Release(clusterCIDR, podCIDR); err != nil { + return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err) + } + } + + // Remove the node from the ClusterCIDR AssociatedNodes. + delete(clusterCIDR.AssociatedNodes, node.Name) + + return nil +} + +// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs +// so that they won't be assignable. +func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { + // Checks if service CIDR has a nonempty intersection with cluster + // CIDR. It is the case if either clusterCIDR contains serviceCIDR with + // clusterCIDR's Mask applied (this means that clusterCIDR contains + // serviceCIDR) or vice versa (which means that serviceCIDR contains + // clusterCIDR). + for _, clusterCIDRList := range r.cidrMap { + for _, clusterCIDR := range clusterCIDRList { + if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil { + klog.Errorf("unable to occupy service CIDR: %w", err) + } + } + } +} + +func (r *multiCIDRRangeAllocator) occupyServiceCIDR(clusterCIDR *cidrset.ClusterCIDR, serviceCIDR *net.IPNet) error { + + cidrSet, err := r.associatedCIDRSet(clusterCIDR, serviceCIDR) + if err != nil { + return err + } + + cidr := cidrSet.ClusterCIDR + + // No need to occupy as Service CIDR doesn't intersect with the current ClusterCIDR. + if !cidr.Contains(serviceCIDR.IP.Mask(cidr.Mask)) && !serviceCIDR.Contains(cidr.IP.Mask(serviceCIDR.Mask)) { + return nil + } + + if err := r.Occupy(clusterCIDR, serviceCIDR); err != nil { + return fmt.Errorf("error filtering out service cidr %v from cluster cidr %v: %w", cidr, serviceCIDR, err) + } + + return nil +} + +// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server. +func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReservedCIDRs) error { + err := func(data multiCIDRNodeReservedCIDRs) error { + cidrsString := ipnetToStringList(data.allocatedCIDRs) + node, err := r.nodeLister.Get(data.nodeName) + if err != nil { + klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) + return err + } + + // if cidr list matches the proposed, + // then we possibly updated this node + // and just failed to ack the success. + if len(node.Spec.PodCIDRs) == len(data.allocatedCIDRs) { + match := true + for idx, cidr := range cidrsString { + if node.Spec.PodCIDRs[idx] != cidr { + match = false + break + } + } + if match { + klog.V(4).Infof("Node %q already has allocated CIDR %q. It matches the proposed one.", node.Name, data.allocatedCIDRs) + return nil + } + } + + // node has cidrs allocated, release the reserved. + if len(node.Spec.PodCIDRs) != 0 { + klog.Errorf("Node %q already has a CIDR allocated %q. Releasing the new one.", node.Name, node.Spec.PodCIDRs) + for _, cidr := range data.allocatedCIDRs { + if err := r.Release(data.clusterCIDR, cidr); err != nil { + return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err) + } + } + return nil + } + + // If we reached here, it means that the node has no CIDR currently assigned. So we set it. + for i := 0; i < cidrUpdateRetries; i++ { + if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil { + data.clusterCIDR.AssociatedNodes[node.Name] = true + klog.Infof("Set node %q PodCIDR to %q", node.Name, cidrsString) + return nil + } + } + // failed release back to the pool. + klog.Errorf("Failed to update node %q PodCIDR to %q after %d attempts: %v", node.Name, cidrsString, cidrUpdateRetries, err) + controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") + // We accept the fact that we may leak CIDRs here. This is safer than releasing + // them in case when we don't know if request went through. + // NodeController restart will return all falsely allocated CIDRs to the pool. + if !apierrors.IsServerTimeout(err) { + klog.Errorf("CIDR assignment for node %q failed: %v. Releasing allocated CIDR", node.Name, err) + for _, cidr := range data.allocatedCIDRs { + if err := r.Release(data.clusterCIDR, cidr); err != nil { + return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err) + } + } + } + return err + }(data) + + r.removeNodeFromProcessing(data.nodeName) + return err +} + +// defaultNodeSelector generates a label with defaultClusterCIDRKey as the key and +// defaultClusterCIDRValue as the value, it is an internal nodeSelector matching all +// nodes. Only used if no ClusterCIDR selects the node. +func defaultNodeSelector() ([]byte, error) { + nodeSelector := &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: defaultClusterCIDRKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{defaultClusterCIDRValue}, + }, + }, + }, + }, + } + + marshalledSelector, err := nodeSelector.Marshal() + if err != nil { + return nil, err + } + + return marshalledSelector, nil +} + +// prioritizedCIDRs returns a list of CIDRs to be allocated to the node. +// Returns 1 CIDR if single stack. +// Returns 2 CIDRs , 1 from each ip family if dual stack. +func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) { + clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node) + if err != nil { + return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err) + } + + for _, clusterCIDR := range clusterCIDRList { + cidrs := make([]*net.IPNet, 0) + if clusterCIDR.IPv4CIDRSet != nil { + cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet) + if err != nil { + klog.V(3).Infof("unable to allocate IPv4 CIDR, trying next range: %w", err) + continue + } + cidrs = append(cidrs, cidr) + } + + if clusterCIDR.IPv6CIDRSet != nil { + cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet) + if err != nil { + klog.V(3).Infof("unable to allocate IPv6 CIDR, trying next range: %w", err) + continue + } + cidrs = append(cidrs, cidr) + } + + return cidrs, clusterCIDR, nil + } + return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s, no available CIDRs", node.Name) +} + +func (r *multiCIDRRangeAllocator) allocateCIDR(clusterCIDR *cidrset.ClusterCIDR, cidrSet *cidrset.MultiCIDRSet) (*net.IPNet, error) { + + for evaluated := 0; evaluated < cidrSet.MaxCIDRs; evaluated++ { + candidate, lastEvaluated, err := cidrSet.NextCandidate() + if err != nil { + return nil, err + } + + evaluated += lastEvaluated + + if r.cidrInAllocatedList(candidate) { + continue + } + + // Deep Check. + if r.cidrOverlapWithAllocatedList(candidate) { + continue + } + + // Mark the CIDR as occupied in the map. + if err := r.Occupy(clusterCIDR, candidate); err != nil { + return nil, err + } + // Increment the evaluated count metric. + cidrSet.UpdateEvaluatedCount(evaluated) + return candidate, nil + } + return nil, &cidrset.CIDRRangeNoCIDRsRemainingErr{ + CIDR: cidrSet.Label, + } +} + +func (r *multiCIDRRangeAllocator) cidrInAllocatedList(cidr *net.IPNet) bool { + for _, clusterCIDRList := range r.cidrMap { + for _, clusterCIDR := range clusterCIDRList { + cidrSet, _ := r.associatedCIDRSet(clusterCIDR, cidr) + if cidrSet != nil { + if ok := cidrSet.AllocatedCIDRMap[cidr.String()]; ok { + return true + } + } + } + } + return false +} + +func (r *multiCIDRRangeAllocator) cidrOverlapWithAllocatedList(cidr *net.IPNet) bool { + for _, clusterCIDRList := range r.cidrMap { + for _, clusterCIDR := range clusterCIDRList { + cidrSet, _ := r.associatedCIDRSet(clusterCIDR, cidr) + if cidrSet != nil { + for allocated := range cidrSet.AllocatedCIDRMap { + _, allocatedCIDR, _ := netutil.ParseCIDRSloppy(allocated) + if cidr.Contains(allocatedCIDR.IP.Mask(cidr.Mask)) || allocatedCIDR.Contains(cidr.IP.Mask(allocatedCIDR.Mask)) { + return true + } + } + } + } + } + return false +} + +// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated. +func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.ClusterCIDR, error) { + clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node) + if err != nil { + return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err) + } + + for _, clusterCIDR := range clusterCIDRList { + if ok := clusterCIDR.AssociatedNodes[node.Name]; ok { + return clusterCIDR, nil + } + } + return nil, fmt.Errorf("no clusterCIDR found associated with node: %s", node.Name) +} + +// orderedMatchingClusterCIDRs returns a list of all the ClusterCIDRs matching the node labels. +// The list is ordered with the following priority, which act as tie-breakers. +// P0: ClusterCIDR with higher number of matching labels has the highest priority. +// P1: ClusterCIDR having cidrSet with fewer allocatable Pod CIDRs has higher priority. +// P2: ClusterCIDR with a PerNodeMaskSize having fewer IPs has higher priority. +// P3: ClusterCIDR having label with lower alphanumeric value has higher priority. +// P4: ClusterCIDR with a cidrSet having a smaller IP address value has a higher priority. +func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node) ([]*cidrset.ClusterCIDR, error) { + matchingCIDRs := make([]*cidrset.ClusterCIDR, 0) + pq := make(PriorityQueue, 0) + + for label, clusterCIDRList := range r.cidrMap { + labelsMatch, matchCnt, err := r.matchCIDRLabels(node, []byte(label)) + if err != nil { + return nil, err + } + + if !labelsMatch { + continue + } + + for _, clusterCIDR := range clusterCIDRList { + pqItem := &PriorityQueueItem{ + clusterCIDR: clusterCIDR, + labelMatchCount: matchCnt, + selectorString: label, + } + + // Only push the CIDRsets which are not marked for termination. + if !clusterCIDR.Terminating { + heap.Push(&pq, pqItem) + } + } + } + + // Remove the ClusterCIDRs from the PriorityQueue. + // They arrive in descending order of matchCnt, + // if matchCnt is equal it is ordered in ascending order of labels. + for pq.Len() > 0 { + pqItem := heap.Pop(&pq).(*PriorityQueueItem) + matchingCIDRs = append(matchingCIDRs, pqItem.clusterCIDR) + } + + // Append the catch all CIDR config. + defaultSelector, err := defaultNodeSelector() + if err != nil { + return nil, err + } + if clusterCIDRList, ok := r.cidrMap[string(defaultSelector)]; ok { + matchingCIDRs = append(matchingCIDRs, clusterCIDRList...) + } + return matchingCIDRs, nil +} + +// matchCIDRLabels Matches the Node labels to CIDR Configs. +// Returns true only if all the labels match, also returns the count of matching labels. +func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label []byte) (bool, int, error) { + var labelSet labels.Set + var matchCnt int + + labelsMatch := false + selector := &v1.NodeSelector{} + err := selector.Unmarshal(label) + if err != nil { + klog.Errorf("Unable to unmarshal node selector for label %v: %v", label, err) + return labelsMatch, 0, err + } + + ls, err := v1helper.NodeSelectorAsSelector(selector) + if err != nil { + klog.Errorf("Unable to convert NodeSelector to labels.Selector: %v", err) + return labelsMatch, 0, err + } + reqs, selectable := ls.Requirements() + + labelSet = node.ObjectMeta.Labels + if selectable { + matchCnt = 0 + for _, req := range reqs { + if req.Matches(labelSet) { + matchCnt += 1 + } + } + if matchCnt == len(reqs) { + labelsMatch = true + } + } + return labelsMatch, matchCnt, err +} + +// Methods for handling ClusterCIDRs. + +// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has +// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags +// to appropriate ClusterCIDR fields. +func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDRList, + allocatorParams CIDRAllocatorParams) { + // Create default ClusterCIDR only if --cluster-cidr has been configured + if len(allocatorParams.ClusterCIDRs) == 0 { + return + } + + for _, clusterCIDR := range existingConfigList.Items { + if clusterCIDR.Name == defaultClusterCIDRName { + // Default ClusterCIDR already exists, no further action required. + klog.V(3).Infof("Default ClusterCIDR %s already exists", defaultClusterCIDRName) + return + } + } + + // Create a default ClusterCIDR as it is not already created. + defaultCIDRConfig := &networkingv1alpha1.ClusterCIDR{ + TypeMeta: metav1.TypeMeta{ + APIVersion: defaultClusterCIDRAPIVersion, + Kind: "ClusterCIDR", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: defaultClusterCIDRName, + }, + Spec: networkingv1alpha1.ClusterCIDRSpec{ + PerNodeHostBits: minPerNodeHostBits, + }, + } + + ipv4PerNodeHostBits := int32(math.MinInt32) + ipv6PerNodeHostBits := int32(math.MinInt32) + isDualstack := false + if len(allocatorParams.ClusterCIDRs) == 2 { + isDualstack = true + } + + for i, cidr := range allocatorParams.ClusterCIDRs { + if netutil.IsIPv4CIDR(cidr) { + defaultCIDRConfig.Spec.IPv4 = cidr.String() + ipv4PerNodeHostBits = ipv4MaxCIDRMask - int32(allocatorParams.NodeCIDRMaskSizes[i]) + if !isDualstack && ipv4PerNodeHostBits > minPerNodeHostBits { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv4PerNodeHostBits + } + } else if netutil.IsIPv6CIDR(cidr) { + defaultCIDRConfig.Spec.IPv6 = cidr.String() + ipv6PerNodeHostBits = ipv6MaxCIDRMask - int32(allocatorParams.NodeCIDRMaskSizes[i]) + if !isDualstack && ipv6PerNodeHostBits > minPerNodeHostBits { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv6PerNodeHostBits + } + } + } + + if isDualstack { + // In case of dualstack CIDRs, currently the default values for PerNodeMaskSize are + // 24 for IPv4 (PerNodeHostBits=8) and 64 for IPv6(PerNodeHostBits=64), there is no + // requirement for the PerNodeHostBits to be equal for IPv4 and IPv6, However with + // the introduction of ClusterCIDRs, we enforce the requirement for a single + // PerNodeHostBits field, thus we choose the minimum PerNodeHostBits value, to avoid + // overflow for IPv4 CIDRs. + if ipv4PerNodeHostBits >= minPerNodeHostBits && ipv4PerNodeHostBits <= ipv6PerNodeHostBits { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv4PerNodeHostBits + } else if ipv6PerNodeHostBits >= minPerNodeHostBits && ipv6PerNodeHostBits <= ipv4MaxCIDRMask { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv6PerNodeHostBits + } + } + + existingConfigList.Items = append(existingConfigList.Items, *defaultCIDRConfig) + + return +} + +// reconcileCreate handles create ClusterCIDR events. +func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + r.lock.Lock() + defer r.lock.Unlock() + + if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) { + klog.V(3).Infof("Creating ClusterCIDR %s", clusterCIDR.Name) + if err := r.createClusterCIDR(clusterCIDR, false); err != nil { + klog.Errorf("Unable to create ClusterCIDR %s : %v", clusterCIDR.Name, err) + return err + } + } + return nil +} + +// reconcileBootstrap handles creation of existing ClusterCIDRs. +// adds a finalizer if not already present. +func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + r.lock.Lock() + defer r.lock.Unlock() + + terminating := false + // Create the ClusterCIDR only if the Spec has not been modified. + if clusterCIDR.Generation > 1 { + terminating = true + err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name) + klog.Errorf("ClusterCIDR Modified: %v", err) + } + + klog.V(2).Infof("Creating ClusterCIDR %s during bootstrap", clusterCIDR.Name) + if err := r.createClusterCIDR(clusterCIDR, terminating); err != nil { + klog.Errorf("Unable to create ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + + return nil +} + +// createClusterCIDR creates and maps the cidrSets in the cidrMap. +func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error { + nodeSelector, err := r.nodeSelectorKey(clusterCIDR) + if err != nil { + return fmt.Errorf("unable to get labelSelector key: %w", err) + } + + clusterCIDRSet, err := r.createClusterCIDRSet(clusterCIDR, terminating) + if err != nil { + return fmt.Errorf("invalid ClusterCIDR: %w", err) + } + + if clusterCIDRSet.IPv4CIDRSet == nil && clusterCIDRSet.IPv6CIDRSet == nil { + return errors.New("invalid ClusterCIDR: must provide IPv4 and/or IPv6 config") + } + + if err := r.mapClusterCIDRSet(r.cidrMap, nodeSelector, clusterCIDRSet); err != nil { + return fmt.Errorf("unable to map clusterCIDRSet: %w", err) + } + + // Make a copy so we don't mutate the shared informer cache. + updatedClusterCIDR := clusterCIDR.DeepCopy() + if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) { + updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer) + } + + if updatedClusterCIDR.ResourceVersion == "" { + // Create is only used for creating default ClusterCIDR. + if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(context.TODO(), updatedClusterCIDR, metav1.CreateOptions{}); err != nil { + klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + } else { + // Update the ClusterCIDR object when called from reconcileCreate. + if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), updatedClusterCIDR, metav1.UpdateOptions{}); err != nil { + klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + } + + return nil +} + +// createClusterCIDRSet creates and returns new cidrset.ClusterCIDR based on ClusterCIDR API object. +func (r *multiCIDRRangeAllocator) createClusterCIDRSet(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) (*cidrset.ClusterCIDR, error) { + + clusterCIDRSet := &cidrset.ClusterCIDR{ + Name: clusterCIDR.Name, + AssociatedNodes: make(map[string]bool, 0), + Terminating: terminating, + } + + if clusterCIDR.Spec.IPv4 != "" { + _, ipv4CIDR, err := netutil.ParseCIDRSloppy(clusterCIDR.Spec.IPv4) + if err != nil { + return nil, fmt.Errorf("unable to parse provided IPv4 CIDR: %w", err) + } + clusterCIDRSet.IPv4CIDRSet, err = cidrset.NewMultiCIDRSet(ipv4CIDR, int(clusterCIDR.Spec.PerNodeHostBits)) + if err != nil { + return nil, fmt.Errorf("unable to create IPv4 cidrSet: %w", err) + } + } + + if clusterCIDR.Spec.IPv6 != "" { + _, ipv6CIDR, err := netutil.ParseCIDRSloppy(clusterCIDR.Spec.IPv6) + if err != nil { + return nil, fmt.Errorf("unable to parse provided IPv6 CIDR: %w", err) + } + clusterCIDRSet.IPv6CIDRSet, err = cidrset.NewMultiCIDRSet(ipv6CIDR, int(clusterCIDR.Spec.PerNodeHostBits)) + if err != nil { + return nil, fmt.Errorf("unable to create IPv6 cidrSet: %w", err) + } + } + + return clusterCIDRSet, nil +} + +// mapClusterCIDRSet maps the ClusterCIDRSet to the provided labelSelector in the cidrMap. +func (r *multiCIDRRangeAllocator) mapClusterCIDRSet(cidrMap map[string][]*cidrset.ClusterCIDR, nodeSelector string, clusterCIDRSet *cidrset.ClusterCIDR) error { + if clusterCIDRSet == nil { + return errors.New("invalid clusterCIDRSet, clusterCIDRSet cannot be nil") + } + + if clusterCIDRSetList, ok := cidrMap[nodeSelector]; ok { + cidrMap[nodeSelector] = append(clusterCIDRSetList, clusterCIDRSet) + } else { + cidrMap[nodeSelector] = []*cidrset.ClusterCIDR{clusterCIDRSet} + } + return nil +} + +// reconcileDelete deletes the ClusterCIDR object and removes the finalizer. +func (r *multiCIDRRangeAllocator) reconcileDelete(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + r.lock.Lock() + defer r.lock.Unlock() + + if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) { + if err := r.deleteClusterCIDR(clusterCIDR); err != nil { + return err + } + // Remove the finalizer as delete is successful. + cccCopy := clusterCIDR.DeepCopy() + cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil) + if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), clusterCIDR, metav1.UpdateOptions{}); err != nil { + klog.V(2).Infof("Error removing finalizer for ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + klog.V(2).Infof("Removed finalizer for ClusterCIDR %s", clusterCIDR.Name) + } + return nil +} + +// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap. +func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + + labelSelector, err := r.nodeSelectorKey(clusterCIDR) + if err != nil { + return fmt.Errorf("unable to delete cidr: %w", err) + } + + clusterCIDRSetList, ok := r.cidrMap[labelSelector] + if !ok { + klog.Infof("Label %s not found in CIDRMap, proceeding with delete", labelSelector) + return nil + } + + for i, clusterCIDRSet := range clusterCIDRSetList { + if clusterCIDRSet.Name != clusterCIDR.Name { + continue + } + + // Mark clusterCIDRSet as terminating. + clusterCIDRSet.Terminating = true + + // Allow deletion only if no nodes are associated with the ClusterCIDR. + if len(clusterCIDRSet.AssociatedNodes) > 0 { + return fmt.Errorf("ClusterCIDRSet %s marked as terminating, won't be deleted until all associated nodes are deleted", clusterCIDR.Name) + } + + // Remove the label from the map if this was the only clusterCIDR associated + // with it. + if len(clusterCIDRSetList) == 1 { + delete(r.cidrMap, labelSelector) + return nil + } + + clusterCIDRSetList = append(clusterCIDRSetList[:i], clusterCIDRSetList[i+1:]...) + r.cidrMap[labelSelector] = clusterCIDRSetList + return nil + } + klog.V(2).Info("clusterCIDR not found, proceeding with delete", "Name", clusterCIDR.Name, "label", labelSelector) + return nil +} + +func (r *multiCIDRRangeAllocator) nodeSelectorKey(clusterCIDR *networkingv1alpha1.ClusterCIDR) (string, error) { + var nodeSelector []byte + var err error + + if clusterCIDR.Spec.NodeSelector != nil { + nodeSelector, err = clusterCIDR.Spec.NodeSelector.Marshal() + } else { + nodeSelector, err = defaultNodeSelector() + } + + if err != nil { + return "", err + } + + return string(nodeSelector), nil +} + +func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) { + var clusterCIDRList *networkingv1alpha1.ClusterCIDRList + // We must poll because apiserver might not be up. This error causes + // controller manager to restart. + startTimestamp := time.Now() + + // start with 2s, multiply the duration by 1.6 each step, 11 steps = 9.7 minutes + backoff := wait.Backoff{ + Duration: 2 * time.Second, + Factor: 1.6, + Steps: 11, + } + + if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) { + var err error + clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{ + FieldSelector: fields.Everything().String(), + LabelSelector: labels.Everything().String(), + }) + if err != nil { + klog.Errorf("Failed to list all clusterCIDRs: %v", err) + return false, nil + } + return true, nil + }); pollErr != nil { + klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp)) + return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map", + apiserverStartupGracePeriod) + } + return clusterCIDRList, nil +} diff --git a/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go new file mode 100644 index 00000000000..6569b0a5d13 --- /dev/null +++ b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go @@ -0,0 +1,1868 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "context" + "fmt" + "net" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + networkingv1alpha1 "k8s.io/api/networking/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" + "k8s.io/kubernetes/pkg/controller" + cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" + "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test" + "k8s.io/kubernetes/pkg/controller/testutil" + utilnet "k8s.io/utils/net" +) + +type testCaseMultiCIDR struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + allocatorParams CIDRAllocatorParams + testCIDRMap map[string][]*cidrset.ClusterCIDR + // key is index of the cidr allocated. + expectedAllocatedCIDR map[int]string + allocatedCIDRs map[int][]string + // should controller creation fail? + ctrlCreateFail bool +} + +type testClusterCIDR struct { + perNodeHostBits int32 + ipv4CIDR string + ipv6CIDR string + name string +} + +type testNodeSelectorRequirement struct { + key string + operator v1.NodeSelectorOperator + values []string +} + +func getTestNodeSelector(requirements []testNodeSelectorRequirement) string { + testNodeSelector := &v1.NodeSelector{} + + for _, nsr := range requirements { + nst := v1.NodeSelectorTerm{ + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nsr.key, + Operator: nsr.operator, + Values: nsr.values, + }, + }, + } + testNodeSelector.NodeSelectorTerms = append(testNodeSelector.NodeSelectorTerms, nst) + } + + marshalledSelector, _ := testNodeSelector.Marshal() + return string(marshalledSelector) +} + +func getTestCidrMap(testClusterCIDRMap map[string][]*testClusterCIDR) map[string][]*cidrset.ClusterCIDR { + cidrMap := make(map[string][]*cidrset.ClusterCIDR, 0) + for labels, testClusterCIDRList := range testClusterCIDRMap { + clusterCIDRList := make([]*cidrset.ClusterCIDR, 0) + for _, testClusterCIDR := range testClusterCIDRList { + clusterCIDR := &cidrset.ClusterCIDR{ + Name: testClusterCIDR.name, + AssociatedNodes: make(map[string]bool, 0), + } + + if testClusterCIDR.ipv4CIDR != "" { + _, testCIDR, _ := utilnet.ParseCIDRSloppy(testClusterCIDR.ipv4CIDR) + testCIDRSet, _ := cidrset.NewMultiCIDRSet(testCIDR, int(testClusterCIDR.perNodeHostBits)) + clusterCIDR.IPv4CIDRSet = testCIDRSet + } + if testClusterCIDR.ipv6CIDR != "" { + _, testCIDR, _ := utilnet.ParseCIDRSloppy(testClusterCIDR.ipv6CIDR) + testCIDRSet, _ := cidrset.NewMultiCIDRSet(testCIDR, int(testClusterCIDR.perNodeHostBits)) + clusterCIDR.IPv6CIDRSet = testCIDRSet + } + clusterCIDRList = append(clusterCIDRList, clusterCIDR) + } + cidrMap[labels] = clusterCIDRList + } + return cidrMap +} + +func getClusterCIDRList(nodeName string, cidrMap map[string][]*cidrset.ClusterCIDR) ([]*cidrset.ClusterCIDR, error) { + labelSelector := getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{nodeName}, + }, + }) + if clusterCIDRList, ok := cidrMap[labelSelector]; ok { + return clusterCIDRList, nil + } + return nil, fmt.Errorf("unable to get clusterCIDR for node: %s", nodeName) +} + +func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) { + // all tests operate on a single node. + testCaseMultiCIDRs := []testCaseMultiCIDR{ + { + description: "success, single stack no node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, dual stack no node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, single stack correct node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr-allocated", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, dual stack both allocated correctly", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24", "ace:cab:deca::1/120"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-allocated", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + // failure cases. + { + description: "fail, single stack incorrect node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"172.10.0.1/24"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr-allocate-fail", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + { + description: "fail, dualstack node allocating from non existing cidr", + + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24", "a00::/86"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-allocate-fail", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + { + description: "fail, dualstack node allocating bad v4", + + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"172.10.0.1/24", "ace:cab:deca::1/120"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-bad-v4", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + { + description: "fail, dualstack node allocating bad v6", + + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24", "cdd::/86"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-bad-v6", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + } + + // test function + for _, tc := range testCaseMultiCIDRs { + t.Run(tc.description, func(t *testing.T) { + // Initialize the range allocator. + fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) + + _, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap) + if err == nil && tc.ctrlCreateFail { + t.Fatalf("creating range allocator was expected to fail, but it did not") + } + if err != nil && !tc.ctrlCreateFail { + t.Fatalf("creating range allocator was expected to succeed, but it did not") + } + }) + } +} + +func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) { + // Non-parallel test (overrides global var). + oldNodePollInterval := nodePollInterval + nodePollInterval = test.NodePollInterval + defer func() { + nodePollInterval = oldNodePollInterval + }() + + // all tests operate on a single node. + testCaseMultiCIDRs := []testCaseMultiCIDR{ + { + description: "When there's no ServiceCIDR return first CIDR in range", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/24", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.0/30", + }, + }, + { + description: "Correctly filter out ServiceCIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + NodeCIDRMaskSizes: []int{30}, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/24", + }, + }, + }), + // it should return first /30 CIDR after service range. + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.64/30", + }, + }, + { + description: "Correctly ignore already allocated CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/24", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.64/30", "127.123.234.68/30", "127.123.234.72/30", "127.123.234.80/30"}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.76/30", + }, + }, + { + description: "Dualstack CIDRs, prioritize clusterCIDR with higher label match count", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/8", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/8", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "127.0.0.0/24", + 1: "abc:def:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, prioritize clusterCIDR with higher label match count, overlapping CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/8", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"10.0.0.0/24", "10.0.1.0/24", "10.0.2.0/24", "10.0.4.0/24"}, + 1: {"ace:cab:deca::/120"}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.3.0/24", + 1: "ace:cab:deca::100/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label match count, prioritize clusterCIDR with fewer allocatable pod CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/8", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/24", + ipv6CIDR: "ace:cab:deca::/120", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/24", + 1: "ace:cab:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count, non comparable allocatable pod CIDRs, prioritize clusterCIDR with lower perNodeMaskSize", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/23", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/120", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/24", + 1: "ace:cab:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count and allocatable pod CIDRs, prioritize clusterCIDR with lower perNodeMaskSize", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/24", + ipv6CIDR: "abc:def:deca::/120", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 0, + ipv4CIDR: "10.0.0.0/32", + ipv6CIDR: "ace:cab:deca::/128", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/32", + 1: "ace:cab:deca::/128", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count, allocatable pod CIDRs and allocatable IPs, prioritize clusterCIDR with lower alphanumeric label", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/16", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.0.0/24", + 1: "abc:def:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count, allocatable pod CIDRs, allocatable IPs and labels, prioritize clusterCIDR with smaller IP", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/16", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/24", + 1: "ace:cab:deca::/120", + }, + }, + { + description: "no double counting", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "nodepool1", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.0/24"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "testLabel-0": "nodepool1", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.2.0/24"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Labels: map[string]string{ + "testLabel-0": "nodepool1", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"nodepool1"}, + }, + }): { + { + name: "no-double-counting", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/22", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.10.1.0/24", + }, + }, + } + + // test function + testFunc := func(tc testCaseMultiCIDR) { + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) + // Initialize the range allocator. + + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap) + if err != nil { + t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) + return + } + rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = test.AlwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + + // this is a bit of white box testing + // pre allocate the CIDRs as per the test + for _, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := utilnet.ParseCIDRSloppy(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + + clusterCIDRList, err := getClusterCIDRList("node0", rangeAllocator.cidrMap) + if err != nil { + t.Fatalf("%v: unexpected error when getting associated clusterCIDR for node %v %v", tc.description, "node0", err) + } + + occupied := false + for _, clusterCIDR := range clusterCIDRList { + if err := rangeAllocator.Occupy(clusterCIDR, cidr); err == nil { + occupied = true + break + } + } + if !occupied { + t.Fatalf("%v: unable to occupy CIDR %v", tc.description, allocated) + } + } + } + + updateCount := 0 + for _, node := range tc.fakeNodeHandler.Existing { + if node.Spec.PodCIDRs == nil { + updateCount++ + } + if err := allocator.AllocateOrOccupyCIDR(node); err != nil { + t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + } + if updateCount != 1 { + t.Fatalf("test error: all tests must update exactly one node") + } + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + + if len(tc.expectedAllocatedCIDR) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDR { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] != expectedPodCIDR { + t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + + // run the test cases + for _, tc := range testCaseMultiCIDRs { + testFunc(tc) + } +} + +func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) { + testCaseMultiCIDRs := []testCaseMultiCIDR{ + { + description: "When there's no ServiceCIDR return first CIDR in range", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "allocate-fail", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/28", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + }, + } + + testFunc := func(tc testCaseMultiCIDR) { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + + // Initialize the range allocator. + allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap) + if err != nil { + t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) + } + rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = test.AlwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + + // this is a bit of white box testing + // pre allocate the CIDRs as per the test + for _, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := utilnet.ParseCIDRSloppy(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + + clusterCIDRList, err := getClusterCIDRList("node0", rangeAllocator.cidrMap) + if err != nil { + t.Fatalf("%v: unexpected error when getting associated clusterCIDR for node %v %v", tc.description, "node0", err) + } + + occupied := false + for _, clusterCIDR := range clusterCIDRList { + if err := rangeAllocator.Occupy(clusterCIDR, cidr); err == nil { + occupied = true + break + } + } + if !occupied { + t.Fatalf("%v: unable to occupy CIDR %v", tc.description, allocated) + } + } + } + + if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil { + t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) + } + // We don't expect any updates, so just sleep for some time + time.Sleep(time.Second) + if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 { + t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy()) + } + if len(tc.expectedAllocatedCIDR) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDR { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] == expectedPodCIDR { + t.Errorf("%v: found cidr %v that should not be allocated on node with CIDRs:%v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + for _, tc := range testCaseMultiCIDRs { + testFunc(tc) + } +} + +type releasetestCaseMultiCIDR struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + testCIDRMap map[string][]*cidrset.ClusterCIDR + allocatorParams CIDRAllocatorParams + expectedAllocatedCIDRFirstRound map[int]string + expectedAllocatedCIDRSecondRound map[int]string + allocatedCIDRs map[int][]string + cidrsToRelease [][]string +} + +func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) { + // Non-parallel test (overrides global var) + oldNodePollInterval := nodePollInterval + nodePollInterval = test.NodePollInterval + defer func() { + nodePollInterval = oldNodePollInterval + }() + + testCaseMultiCIDRs := []releasetestCaseMultiCIDR{ + { + description: "Correctly release preallocated CIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "cidr-release", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/28", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + expectedAllocatedCIDRFirstRound: nil, + cidrsToRelease: [][]string{ + {"127.123.234.4/30"}, + }, + expectedAllocatedCIDRSecondRound: map[int]string{ + 0: "127.123.234.4/30", + }, + }, + { + description: "Correctly recycle CIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "cidr-release", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/28", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + expectedAllocatedCIDRFirstRound: map[int]string{ + 0: "127.123.234.0/30", + }, + cidrsToRelease: [][]string{ + {"127.123.234.0/30"}, + }, + expectedAllocatedCIDRSecondRound: map[int]string{ + 0: "127.123.234.0/30", + }, + }, + } + + testFunc := func(tc releasetestCaseMultiCIDR) { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + // Initialize the range allocator. + allocator, _ := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap) + rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = test.AlwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + + // this is a bit of white box testing + for _, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := utilnet.ParseCIDRSloppy(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + + clusterCIDRList, err := getClusterCIDRList("node0", rangeAllocator.cidrMap) + if err != nil { + t.Fatalf("%v: unexpected error when getting associated clusterCIDR for node %v %v", tc.description, "node0", err) + } + + occupied := false + for _, clusterCIDR := range clusterCIDRList { + if err := rangeAllocator.Occupy(clusterCIDR, cidr); err == nil { + occupied = true + clusterCIDR.AssociatedNodes["fakeNode"] = true + break + } + } + if !occupied { + t.Fatalf("%v: unable to occupy CIDR %v", tc.description, allocated) + } + } + } + + err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]) + if len(tc.expectedAllocatedCIDRFirstRound) != 0 { + if err != nil { + t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + } else { + if err == nil { + t.Fatalf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) + } + // We don't expect any updates here + time.Sleep(time.Second) + if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 { + t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy()) + } + } + + for _, cidrToRelease := range tc.cidrsToRelease { + + nodeToRelease := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fakeNode", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + } + nodeToRelease.Spec.PodCIDRs = cidrToRelease + err = allocator.ReleaseCIDR(&nodeToRelease) + if err != nil { + t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err) + } + } + if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { + t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + + if len(tc.expectedAllocatedCIDRSecondRound) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDRSecondRound { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] != expectedPodCIDR { + t.Errorf("%v: found cidr %v that should not be allocated on node with CIDRs:%v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + + for _, tc := range testCaseMultiCIDRs { + testFunc(tc) + } +} + +// ClusterCIDR tests. + +var alwaysReady = func() bool { return true } + +type clusterCIDRController struct { + *multiCIDRRangeAllocator + clusterCIDRStore cache.Store +} + +func newController() (*fake.Clientset, *clusterCIDRController) { + client := fake.NewSimpleClientset() + + informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) + cccInformer := informerFactory.Networking().V1alpha1().ClusterCIDRs() + cccIndexer := cccInformer.Informer().GetIndexer() + + nodeInformer := informerFactory.Core().V1().Nodes() + + // These reactors are required to mock functionality that would be covered + // automatically if we weren't using the fake client. + client.PrependReactor("create", "clustercidrs", k8stesting.ReactionFunc(func(action k8stesting.Action) (bool, runtime.Object, error) { + clusterCIDR := action.(k8stesting.CreateAction).GetObject().(*networkingv1alpha1.ClusterCIDR) + + if clusterCIDR.ObjectMeta.GenerateName != "" { + clusterCIDR.ObjectMeta.Name = fmt.Sprintf("%s-%s", clusterCIDR.ObjectMeta.GenerateName, rand.String(8)) + clusterCIDR.ObjectMeta.GenerateName = "" + } + clusterCIDR.Generation = 1 + cccIndexer.Add(clusterCIDR) + + return false, clusterCIDR, nil + })) + client.PrependReactor("update", "clustercidrs", k8stesting.ReactionFunc(func(action k8stesting.Action) (bool, runtime.Object, error) { + clusterCIDR := action.(k8stesting.CreateAction).GetObject().(*networkingv1alpha1.ClusterCIDR) + clusterCIDR.Generation++ + cccIndexer.Update(clusterCIDR) + + return false, clusterCIDR, nil + })) + + _, clusterCIDR, _ := utilnet.ParseCIDRSloppy("192.168.0.0/16") + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("10.1.0.0/16") + + allocatorParams := CIDRAllocatorParams{ + ClusterCIDRs: []*net.IPNet{clusterCIDR}, + ServiceCIDR: serviceCIDR, + SecondaryServiceCIDR: nil, + NodeCIDRMaskSizes: []int{24}, + } + testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0) + + // Initialize the range allocator. + ra, _ := NewMultiCIDRRangeAllocator(client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap) + cccController := ra.(*multiCIDRRangeAllocator) + + cccController.clusterCIDRSynced = alwaysReady + + return client, &clusterCIDRController{ + cccController, + informerFactory.Networking().V1alpha1().ClusterCIDRs().Informer().GetStore(), + } +} + +// Ensure default ClusterCIDR is created during bootstrap. +func TestClusterCIDRDefault(t *testing.T) { + defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil) + + client, _ := newController() + createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{}) + assert.Nil(t, err, "Expected no error getting clustercidr objects") + assert.Equal(t, defaultCCC.Spec, createdCCC.Spec) +} + +// Ensure SyncClusterCIDR creates a new valid ClusterCIDR. +func TestSyncClusterCIDRCreate(t *testing.T) { + tests := []struct { + name string + ccc *networkingv1alpha1.ClusterCIDR + wantErr bool + }{ + { + name: "valid IPv4 ClusterCIDR with no NodeSelector", + ccc: makeClusterCIDR("ipv4-ccc", "10.2.0.0/16", "", 8, nil), + wantErr: false, + }, + { + name: "valid IPv4 ClusterCIDR with NodeSelector", + ccc: makeClusterCIDR("ipv4-ccc-label", "10.3.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid IPv4 ClusterCIDR with overlapping CIDRs", + ccc: makeClusterCIDR("ipv4-ccc-overlap", "10.2.0.0/24", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid IPv6 ClusterCIDR with no NodeSelector", + ccc: makeClusterCIDR("ipv6-ccc", "", "fd00:1::/112", 8, nil), + wantErr: false, + }, + { + name: "valid IPv6 ClusterCIDR with NodeSelector", + ccc: makeClusterCIDR("ipv6-ccc-label", "", "fd00:2::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid IPv6 ClusterCIDR with overlapping CIDRs", + ccc: makeClusterCIDR("ipv6-ccc-overlap", "", "fd00:1:1::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid Dualstack ClusterCIDR with no NodeSelector", + ccc: makeClusterCIDR("dual-ccc", "10.2.0.0/16", "fd00:1::/112", 8, nil), + wantErr: false, + }, + { + name: "valid DualStack ClusterCIDR with NodeSelector", + ccc: makeClusterCIDR("dual-ccc-label", "10.3.0.0/16", "fd00:2::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid Dualstack ClusterCIDR with overlapping CIDRs", + ccc: makeClusterCIDR("dual-ccc-overlap", "10.2.0.0/16", "fd00:1:1::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + // invalid ClusterCIDRs. + { + name: "invalid ClusterCIDR with both IPv4 and IPv6 CIDRs nil", + ccc: makeClusterCIDR("invalid-ccc", "", "", 0, nil), + wantErr: true, + }, + { + name: "invalid IPv4 ClusterCIDR", + ccc: makeClusterCIDR("invalid-ipv4-ccc", "1000.2.0.0/16", "", 8, nil), + wantErr: true, + }, + { + name: "invalid IPv6 ClusterCIDR", + ccc: makeClusterCIDR("invalid-ipv6-ccc", "", "aaaaa:1:1::/112", 8, nil), + wantErr: true, + }, + { + name: "invalid dualstack ClusterCIDR", + ccc: makeClusterCIDR("invalid-dual-ccc", "10.2.0.0/16", "aaaaa:1:1::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: true, + }, + } + + client, cccController := newController() + for _, tc := range tests { + cccController.clusterCIDRStore.Add(tc.ccc) + err := cccController.syncClusterCIDR(tc.ccc.Name) + if tc.wantErr { + assert.Error(t, err) + continue + } + assert.NoError(t, err) + expectActions(t, client.Actions(), 1, "create", "clustercidrs") + + createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), tc.ccc.Name, metav1.GetOptions{}) + assert.Nil(t, err, "Expected no error getting clustercidr object") + assert.Equal(t, tc.ccc.Spec, createdCCC.Spec) + assert.Equal(t, []string{clusterCIDRFinalizer}, createdCCC.Finalizers) + } +} + +// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR. +func TestSyncClusterCIDRDelete(t *testing.T) { + _, cccController := newController() + + testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})) + + cccController.clusterCIDRStore.Add(testCCC) + err := cccController.syncClusterCIDR(testCCC.Name) + assert.NoError(t, err) + + deletionTimestamp := metav1.Now() + testCCC.DeletionTimestamp = &deletionTimestamp + cccController.clusterCIDRStore.Update(testCCC) + err = cccController.syncClusterCIDR(testCCC.Name) + assert.NoError(t, err) +} + +// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR +// if a node is associated with the ClusterCIDR. +func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) { + client, cccController := newController() + + testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})) + + cccController.clusterCIDRStore.Add(testCCC) + err := cccController.syncClusterCIDR(testCCC.Name) + assert.NoError(t, err) + + // Mock the IPAM controller behavior associating node with ClusterCIDR. + nodeSelectorKey, _ := cccController.nodeSelectorKey(testCCC) + clusterCIDRs, _ := cccController.cidrMap[nodeSelectorKey] + clusterCIDRs[0].AssociatedNodes["test-node"] = true + + createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), testCCC.Name, metav1.GetOptions{}) + assert.Nil(t, err, "Expected no error getting clustercidr object") + + deletionTimestamp := metav1.Now() + createdCCC.DeletionTimestamp = &deletionTimestamp + cccController.clusterCIDRStore.Update(createdCCC) + err = cccController.syncClusterCIDR(createdCCC.Name) + assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name)) +} + +func expectActions(t *testing.T, actions []k8stesting.Action, num int, verb, resource string) { + t.Helper() + // if actions are less, the below logic will panic. + if num > len(actions) { + t.Fatalf("len of actions %v is unexpected. Expected to be at least %v", len(actions), num+1) + } + + for i := 0; i < num; i++ { + relativePos := len(actions) - i - 1 + assert.Equal(t, verb, actions[relativePos].GetVerb(), "Expected action -%d verb to be %s", i, verb) + assert.Equal(t, resource, actions[relativePos].GetResource().Resource, "Expected action -%d resource to be %s", i, resource) + } +} + +func makeNodeSelector(key string, op v1.NodeSelectorOperator, values []string) *v1.NodeSelector { + return &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: key, + Operator: op, + Values: values, + }, + }, + }, + }, + } +} + +// makeClusterCIDR returns a mock ClusterCIDR object. +func makeClusterCIDR(cccName, ipv4CIDR, ipv6CIDR string, perNodeHostBits int32, nodeSelector *v1.NodeSelector) *networkingv1alpha1.ClusterCIDR { + testCCC := &networkingv1alpha1.ClusterCIDR{ + ObjectMeta: metav1.ObjectMeta{Name: cccName}, + Spec: networkingv1alpha1.ClusterCIDRSpec{}, + } + + testCCC.Spec.PerNodeHostBits = perNodeHostBits + + if ipv4CIDR != "" { + testCCC.Spec.IPv4 = ipv4CIDR + } + + if ipv6CIDR != "" { + testCCC.Spec.IPv6 = ipv6CIDR + } + + if nodeSelector != nil { + testCCC.Spec.NodeSelector = nodeSelector + } + + return testCCC +} diff --git a/pkg/controller/nodeipam/ipam/range_allocator.go b/pkg/controller/nodeipam/ipam/range_allocator.go index 3cf6794b228..54a0db9f51f 100644 --- a/pkg/controller/nodeipam/ipam/range_allocator.go +++ b/pkg/controller/nodeipam/ipam/range_allocator.go @@ -41,13 +41,6 @@ import ( controllerutil "k8s.io/kubernetes/pkg/controller/util/node" ) -// cidrs are reserved, then node resource is patched with them -// this type holds the reservation info for a node -type nodeReservedCIDRs struct { - allocatedCIDRs []*net.IPNet - nodeName string -} - type rangeAllocator struct { client clientset.Interface // cluster cidrs as passed in during controller creation @@ -333,7 +326,7 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error { var err error var node *v1.Node defer r.removeNodeFromProcessing(data.nodeName) - cidrsString := cidrsAsString(data.allocatedCIDRs) + cidrsString := ipnetToStringList(data.allocatedCIDRs) node, err = r.nodeLister.Get(data.nodeName) if err != nil { klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) @@ -391,12 +384,3 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error { } return err } - -// converts a slice of cidrs into ,, -func cidrsAsString(inCIDRs []*net.IPNet) []string { - outCIDRs := make([]string, len(inCIDRs)) - for idx, inCIDR := range inCIDRs { - outCIDRs[idx] = inCIDR.String() - } - return outCIDRs -} diff --git a/pkg/controller/nodeipam/ipam/range_allocator_test.go b/pkg/controller/nodeipam/ipam/range_allocator_test.go index 0e7c452e01f..b0dd1c32f47 100644 --- a/pkg/controller/nodeipam/ipam/range_allocator_test.go +++ b/pkg/controller/nodeipam/ipam/range_allocator_test.go @@ -25,40 +25,12 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/informers" - coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes/fake" - "k8s.io/kubernetes/pkg/controller" + "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test" "k8s.io/kubernetes/pkg/controller/testutil" netutils "k8s.io/utils/net" ) -const testNodePollInterval = 10 * time.Millisecond - -var alwaysReady = func() bool { return true } - -func waitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error { - return wait.Poll(nodePollInterval, timeout, func() (bool, error) { - if len(nodeHandler.GetUpdatedNodesCopy()) >= number { - return true, nil - } - return false, nil - }) -} - -// Creates a fakeNodeInformer using the provided fakeNodeHandler. -func getFakeNodeInformer(fakeNodeHandler *testutil.FakeNodeHandler) coreinformers.NodeInformer { - fakeClient := &fake.Clientset{} - fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) - fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() - - for _, node := range fakeNodeHandler.Existing { - fakeNodeInformer.Informer().GetStore().Add(node) - } - - return fakeNodeInformer -} - type testCase struct { description string fakeNodeHandler *testutil.FakeNodeHandler @@ -305,7 +277,7 @@ func TestOccupyPreExistingCIDR(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { // Initialize the range allocator. - fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) _, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) if err == nil && tc.ctrlCreateFail { @@ -321,7 +293,7 @@ func TestOccupyPreExistingCIDR(t *testing.T) { func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { // Non-parallel test (overrides global var) oldNodePollInterval := nodePollInterval - nodePollInterval = testNodePollInterval + nodePollInterval = test.NodePollInterval defer func() { nodePollInterval = oldNodePollInterval }() @@ -537,7 +509,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { // test function testFunc := func(tc testCase) { - fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) // Initialize the range allocator. allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) @@ -550,7 +522,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) return } - rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.recorder = testutil.NewFakeRecorder() go allocator.Run(wait.NeverStop) @@ -580,7 +552,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { if updateCount != 1 { t.Fatalf("test error: all tests must update exactly one node") } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) } @@ -639,7 +611,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) { testFunc := func(tc testCase) { // Initialize the range allocator. - allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) + allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) if err != nil { t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) } @@ -648,7 +620,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) { t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) return } - rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.recorder = testutil.NewFakeRecorder() go allocator.Run(wait.NeverStop) @@ -708,7 +680,7 @@ type releaseTestCase struct { func TestReleaseCIDRSuccess(t *testing.T) { // Non-parallel test (overrides global var) oldNodePollInterval := nodePollInterval - nodePollInterval = testNodePollInterval + nodePollInterval = test.NodePollInterval defer func() { nodePollInterval = oldNodePollInterval }() @@ -784,13 +756,13 @@ func TestReleaseCIDRSuccess(t *testing.T) { testFunc := func(tc releaseTestCase) { // Initialize the range allocator. - allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) + allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) rangeAllocator, ok := allocator.(*rangeAllocator) if !ok { t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) return } - rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.recorder = testutil.NewFakeRecorder() go allocator.Run(wait.NeverStop) @@ -813,7 +785,7 @@ func TestReleaseCIDRSuccess(t *testing.T) { if err != nil { t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) } } else { @@ -841,7 +813,7 @@ func TestReleaseCIDRSuccess(t *testing.T) { if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) } diff --git a/pkg/controller/nodeipam/ipam/test/utils.go b/pkg/controller/nodeipam/ipam/test/utils.go index 42242e1899b..2586484e975 100644 --- a/pkg/controller/nodeipam/ipam/test/utils.go +++ b/pkg/controller/nodeipam/ipam/test/utils.go @@ -18,10 +18,21 @@ package test import ( "net" + "time" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/kubernetes/pkg/controller" + "k8s.io/kubernetes/pkg/controller/testutil" netutils "k8s.io/utils/net" ) +const NodePollInterval = 10 * time.Millisecond + +var AlwaysReady = func() bool { return true } + // MustParseCIDR returns the CIDR range parsed from s or panics if the string // cannot be parsed. func MustParseCIDR(s string) *net.IPNet { @@ -31,3 +42,25 @@ func MustParseCIDR(s string) *net.IPNet { } return ret } + +// FakeNodeInformer creates a fakeNodeInformer using the provided fakeNodeHandler. +func FakeNodeInformer(fakeNodeHandler *testutil.FakeNodeHandler) coreinformers.NodeInformer { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() + + for _, node := range fakeNodeHandler.Existing { + fakeNodeInformer.Informer().GetStore().Add(node) + } + + return fakeNodeInformer +} + +func WaitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error { + return wait.Poll(NodePollInterval, timeout, func() (bool, error) { + if len(nodeHandler.GetUpdatedNodesCopy()) >= number { + return true, nil + } + return false, nil + }) +} diff --git a/pkg/controller/nodeipam/node_ipam_controller.go b/pkg/controller/nodeipam/node_ipam_controller.go index 3a3c8ce1919..2cae14bf532 100644 --- a/pkg/controller/nodeipam/node_ipam_controller.go +++ b/pkg/controller/nodeipam/node_ipam_controller.go @@ -20,20 +20,18 @@ import ( "net" "time" - "k8s.io/klog/v2" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - + coreinformers "k8s.io/client-go/informers/core/v1" + networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" + clientset "k8s.io/client-go/kubernetes" v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" - - coreinformers "k8s.io/client-go/informers/core/v1" - clientset "k8s.io/client-go/kubernetes" - corelisters "k8s.io/client-go/listers/core/v1" cloudprovider "k8s.io/cloud-provider" controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers" "k8s.io/component-base/metrics/prometheus/ratelimiter" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam" ) @@ -74,6 +72,7 @@ type Controller struct { // currently, this should be handled as a fatal error. func NewNodeIpamController( nodeInformer coreinformers.NodeInformer, + clusterCIDRInformer networkinginformers.ClusterCIDRInformer, cloud cloudprovider.Interface, kubeClient clientset.Interface, clusterCIDRs []*net.IPNet, @@ -136,7 +135,7 @@ func NewNodeIpamController( NodeCIDRMaskSizes: nodeCIDRMaskSizes, } - ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, ic.allocatorType, allocatorParams) + ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams) if err != nil { return nil, err } diff --git a/pkg/controller/nodeipam/node_ipam_controller_test.go b/pkg/controller/nodeipam/node_ipam_controller_test.go index 48e850b9e78..ad4b433789f 100644 --- a/pkg/controller/nodeipam/node_ipam_controller_test.go +++ b/pkg/controller/nodeipam/node_ipam_controller_test.go @@ -48,6 +48,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, fakeClient := &fake.Clientset{} fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() for _, node := range fakeNodeHandler.Existing { fakeNodeInformer.Informer().GetStore().Add(node) @@ -55,7 +56,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues()) return NewNodeIpamController( - fakeNodeInformer, fakeGCE, clientSet, + fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet, clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType, ) } @@ -78,6 +79,9 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) { {"valid_range_allocator_dualstack", "10.0.0.0/21,2000::/10", "10.1.0.0/21", emptyServiceCIDR, []int{24, 98}, ipam.RangeAllocatorType, false}, {"valid_range_allocator_dualstack_dualstackservice", "10.0.0.0/21,2000::/10", "10.1.0.0/21", "3000::/10", []int{24, 98}, ipam.RangeAllocatorType, false}, + {"valid_multi_cidr_range_allocator", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.MultiCIDRRangeAllocatorType, false}, + {"valid_multi_cidr_range_allocator_dualstack", "10.0.0.0/21,2000::/10", "10.1.0.0/21", emptyServiceCIDR, []int{24, 98}, ipam.MultiCIDRRangeAllocatorType, false}, + {"valid_cloud_allocator", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.CloudAllocatorType, false}, {"valid_ipam_from_cluster", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, false}, {"valid_ipam_from_cloud", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromCloudAllocatorType, false}, diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 5680afd02f8..24bdc1c1f6c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -569,6 +569,13 @@ const ( // Enables the usage of different protocols in the same Service with type=LoadBalancer MixedProtocolLBService featuregate.Feature = "MixedProtocolLBService" + // owner: @sarveshr7 + // kep: http://kep.k8s.io/2593 + // alpha: v1.25 + // + // Enables the MultiCIDR Range allocator. + MultiCIDRRangeAllocator featuregate.Feature = "MultiCIDRRangeAllocator" + // owner: @rikatz // kep: http://kep.k8s.io/2079 // alpha: v1.21 @@ -997,6 +1004,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS MixedProtocolLBService: {Default: true, PreRelease: featuregate.Beta}, + MultiCIDRRangeAllocator: {Default: false, PreRelease: featuregate.Alpha}, + NetworkPolicyEndPort: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27 NetworkPolicyStatus: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/test/integration/ipamperf/ipam_test.go b/test/integration/ipamperf/ipam_test.go index 90e931358b5..a0f35627978 100644 --- a/test/integration/ipamperf/ipam_test.go +++ b/test/integration/ipamperf/ipam_test.go @@ -50,8 +50,10 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR, sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour) ipamController, err := nodeipam.NewNodeIpamController( - sharedInformer.Core().V1().Nodes(), config.Cloud, clientSet, - []*net.IPNet{clusterCIDR}, serviceCIDR, nil, []int{subnetMaskSize}, config.AllocatorType, + sharedInformer.Core().V1().Nodes(), + sharedInformer.Networking().V1alpha1().ClusterCIDRs(), + config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil, + []int{subnetMaskSize}, config.AllocatorType, ) if err != nil { return nil, shutdownFunc, err