diff --git a/cmd/cloud-controller-manager/.import-restrictions b/cmd/cloud-controller-manager/.import-restrictions index 151e8744804..695312b7312 100644 --- a/cmd/cloud-controller-manager/.import-restrictions +++ b/cmd/cloud-controller-manager/.import-restrictions @@ -39,4 +39,5 @@ rules: - k8s.io/kubernetes/pkg/util/taints - k8s.io/kubernetes/pkg/proxy/util - k8s.io/kubernetes/pkg/proxy/util/testing + - k8s.io/kubernetes/pkg/util/slice - k8s.io/kubernetes/pkg/util/sysctl \ No newline at end of file diff --git a/cmd/cloud-controller-manager/nodeipamcontroller.go b/cmd/cloud-controller-manager/nodeipamcontroller.go index b03a9cd4a09..bcd6211342f 100644 --- a/cmd/cloud-controller-manager/nodeipamcontroller.go +++ b/cmd/cloud-controller-manager/nodeipamcontroller.go @@ -26,6 +26,8 @@ import ( "net" "strings" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/informers/networking/v1alpha1" cloudprovider "k8s.io/cloud-provider" "k8s.io/cloud-provider/app" cloudcontrollerconfig "k8s.io/cloud-provider/app/config" @@ -36,6 +38,7 @@ import ( nodeipamcontroller "k8s.io/kubernetes/pkg/controller/nodeipam" nodeipamconfig "k8s.io/kubernetes/pkg/controller/nodeipam/config" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam" + "k8s.io/kubernetes/pkg/features" netutils "k8s.io/utils/net" ) @@ -120,8 +123,14 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c return nil, false, err } + var clusterCIDRInformer v1alpha1.ClusterCIDRInformer + if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { + clusterCIDRInformer = ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs() + } + nodeIpamController, err := nodeipamcontroller.NewNodeIpamController( ctx.InformerFactory.Core().V1().Nodes(), + clusterCIDRInformer, cloud, ctx.ClientBuilder.ClientOrDie(initContext.ClientName), clusterCIDRs, diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 991b29a4347..336adf82c4a 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -27,7 +27,9 @@ import ( "strings" "time" + "k8s.io/client-go/informers/networking/v1alpha1" "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -153,8 +155,14 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo return nil, false, err } + var clusterCIDRInformer v1alpha1.ClusterCIDRInformer + if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { + clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs() + } + nodeIpamController, err := nodeipamcontroller.NewNodeIpamController( controllerContext.InformerFactory.Core().V1().Nodes(), + clusterCIDRInformer, controllerContext.Cloud, controllerContext.ClientBuilder.ClientOrDie("node-controller"), clusterCIDRs, diff --git a/pkg/controller/nodeipam/ipam/cidr_allocator.go b/pkg/controller/nodeipam/ipam/cidr_allocator.go index 543a7797f13..4ca058eefcf 100644 --- a/pkg/controller/nodeipam/ipam/cidr_allocator.go +++ b/pkg/controller/nodeipam/ipam/cidr_allocator.go @@ -22,16 +22,18 @@ import ( "net" "time" - "k8s.io/klog/v2" - "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" informers "k8s.io/client-go/informers/core/v1" + networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" clientset "k8s.io/client-go/kubernetes" cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" ) // CIDRAllocatorType is the type of the allocator to use. @@ -41,6 +43,9 @@ const ( // RangeAllocatorType is the allocator that uses an internal CIDR // range allocator to do node CIDR range allocations. RangeAllocatorType CIDRAllocatorType = "RangeAllocator" + // MultiCIDRRangeAllocatorType is the allocator that uses an internal CIDR + // range allocator to do node CIDR range allocations. + MultiCIDRRangeAllocatorType CIDRAllocatorType = "MultiCIDRRangeAllocator" // CloudAllocatorType is the allocator that uses cloud platform // support to do node CIDR range allocations. CloudAllocatorType CIDRAllocatorType = "CloudAllocator" @@ -87,7 +92,7 @@ type CIDRAllocator interface { // CIDR if it doesn't currently have one or mark the CIDR as used if // the node already have one. AllocateOrOccupyCIDR(node *v1.Node) error - // ReleaseCIDR releases the CIDR of the removed node + // ReleaseCIDR releases the CIDR of the removed node. ReleaseCIDR(node *v1.Node) error // Run starts all the working logic of the allocator. Run(stopCh <-chan struct{}) @@ -96,18 +101,25 @@ type CIDRAllocator interface { // CIDRAllocatorParams is parameters that's required for creating new // cidr range allocator. type CIDRAllocatorParams struct { - // ClusterCIDRs is list of cluster cidrs + // ClusterCIDRs is list of cluster cidrs. ClusterCIDRs []*net.IPNet - // ServiceCIDR is primary service cidr for cluster + // ServiceCIDR is primary service cidr for cluster. ServiceCIDR *net.IPNet - // SecondaryServiceCIDR is secondary service cidr for cluster + // SecondaryServiceCIDR is secondary service cidr for cluster. SecondaryServiceCIDR *net.IPNet - // NodeCIDRMaskSizes is list of node cidr mask sizes + // NodeCIDRMaskSizes is list of node cidr mask sizes. NodeCIDRMaskSizes []int } +// CIDRs are reserved, then node resource is patched with them. +// nodeReservedCIDRs holds the reservation info for a node. +type nodeReservedCIDRs struct { + allocatedCIDRs []*net.IPNet + nodeName string +} + // New creates a new CIDR range allocator. -func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) { +func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) { nodeList, err := listNodes(kubeClient) if err != nil { return nil, err @@ -116,6 +128,12 @@ func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInfo switch allocatorType { case RangeAllocatorType: return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList) + case MultiCIDRRangeAllocatorType: + if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { + return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator) + } + return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil) + case CloudAllocatorType: return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer) default: @@ -144,3 +162,12 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) { } return nodeList, nil } + +// ipnetToStringList converts a slice of net.IPNet into a list of CIDR in string format +func ipnetToStringList(inCIDRs []*net.IPNet) []string { + outCIDRs := make([]string, len(inCIDRs)) + for idx, inCIDR := range inCIDRs { + outCIDRs[idx] = inCIDR.String() + } + return outCIDRs +} diff --git a/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go new file mode 100644 index 00000000000..5fb96887df2 --- /dev/null +++ b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator.go @@ -0,0 +1,1205 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "container/heap" + "context" + "errors" + "fmt" + "math" + "math/rand" + "net" + "sync" + "time" + + "k8s.io/api/core/v1" + networkingv1alpha1 "k8s.io/api/networking/v1alpha1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + informers "k8s.io/client-go/informers/core/v1" + networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + networkinglisters "k8s.io/client-go/listers/networking/v1alpha1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + "k8s.io/component-base/metrics/prometheus/ratelimiter" + nodeutil "k8s.io/component-helpers/node/util" + "k8s.io/klog/v2" + v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" + controllerutil "k8s.io/kubernetes/pkg/controller/util/node" + "k8s.io/kubernetes/pkg/util/slice" + netutil "k8s.io/utils/net" +) + +const ( + defaultClusterCIDRKey = "kubernetes.io/clusterCIDR" + defaultClusterCIDRValue = "default" + defaultClusterCIDRName = "default-cluster-cidr" + defaultClusterCIDRAPIVersion = "networking.k8s.io/v1alpha1" + clusterCIDRFinalizer = "networking.k8s.io/cluster-cidr-finalizer" + ipv4MaxCIDRMask = 32 + ipv6MaxCIDRMask = 128 + minPerNodeHostBits = 4 +) + +// CIDRs are reserved, then node resource is patched with them. +// multiCIDRNodeReservedCIDRs holds the reservation info for a node. +type multiCIDRNodeReservedCIDRs struct { + nodeReservedCIDRs + clusterCIDR *cidrset.ClusterCIDR +} + +// multiCIDRNodeProcessingInfo tracks information related to current nodes in processing +type multiCIDRNodeProcessingInfo struct { + retries int +} + +type multiCIDRRangeAllocator struct { + client clientset.Interface + // nodeLister is able to list/get nodes and is populated by the shared informer passed to controller. + nodeLister corelisters.NodeLister + // nodesSynced returns true if the node shared informer has been synced at least once. + nodesSynced cache.InformerSynced + // clusterCIDRLister is able to list/get clustercidrs and is populated by the shared informer passed to controller. + clusterCIDRLister networkinglisters.ClusterCIDRLister + // clusterCIDRSynced returns true if the clustercidr shared informer has been synced at least once. + clusterCIDRSynced cache.InformerSynced + // Channel that is used to pass updating Nodes and their reserved CIDRs to the background. + // This increases a throughput of CIDR assignment by not blocking on long operations. + nodeCIDRUpdateChannel chan multiCIDRNodeReservedCIDRs + recorder record.EventRecorder + // queue is where incoming work is placed to de-dup and to allow "easy" + // rate limited requeues on errors + queue workqueue.RateLimitingInterface + + // lock guards nodesInProcessing and cidrMap to avoid races in CIDR allocation. + lock *sync.Mutex + // nodesInProcessing is a set of nodes that are currently being processed. + nodesInProcessing map[string]*multiCIDRNodeProcessingInfo + // cidrMap maps ClusterCIDR labels to internal ClusterCIDR objects. + cidrMap map[string][]*cidrset.ClusterCIDR +} + +// NewMultiCIDRRangeAllocator returns a CIDRAllocator to allocate CIDRs for node (one for each ip family). +// Caller must always pass in a list of existing nodes to the new allocator. +// NodeList is only nil in testing. +func NewMultiCIDRRangeAllocator( + client clientset.Interface, + nodeInformer informers.NodeInformer, + clusterCIDRInformer networkinginformers.ClusterCIDRInformer, + allocatorParams CIDRAllocatorParams, + nodeList *v1.NodeList, + testCIDRMap map[string][]*cidrset.ClusterCIDR, +) (CIDRAllocator, error) { + if client == nil { + klog.Fatalf("client is nil") + } + + eventBroadcaster := record.NewBroadcaster() + eventSource := v1.EventSource{ + Component: "multiCIDRRangeAllocator", + } + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, eventSource) + eventBroadcaster.StartStructuredLogging(0) + klog.V(0).Infof("Started sending events to API Server. (EventSource = %v)", eventSource) + + eventBroadcaster.StartRecordingToSink( + &v1core.EventSinkImpl{ + Interface: client.CoreV1().Events(""), + }) + + if client.CoreV1().RESTClient().GetRateLimiter() != nil { + ratelimiter.RegisterMetricAndTrackRateLimiterUsage("multi_cidr_range_allocator", client.CoreV1().RESTClient().GetRateLimiter()) + } + + ra := &multiCIDRRangeAllocator{ + client: client, + nodeLister: nodeInformer.Lister(), + nodesSynced: nodeInformer.Informer().HasSynced, + clusterCIDRLister: clusterCIDRInformer.Lister(), + clusterCIDRSynced: clusterCIDRInformer.Informer().HasSynced, + nodeCIDRUpdateChannel: make(chan multiCIDRNodeReservedCIDRs, cidrUpdateQueueSize), + recorder: recorder, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "multi_cidr_range_allocator"), + lock: &sync.Mutex{}, + nodesInProcessing: map[string]*multiCIDRNodeProcessingInfo{}, + cidrMap: make(map[string][]*cidrset.ClusterCIDR, 0), + } + + // testCIDRMap is only set for testing purposes. + if len(testCIDRMap) > 0 { + ra.cidrMap = testCIDRMap + klog.Warningf("testCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug.") + } + + ccList, err := listClusterCIDRs(client) + if err != nil { + return nil, err + } + + if ccList == nil { + ccList = &networkingv1alpha1.ClusterCIDRList{} + } + createDefaultClusterCIDR(ccList, allocatorParams) + + // Regenerate the cidrMaps from the existing ClusterCIDRs. + for _, clusterCIDR := range ccList.Items { + klog.Infof("Regenerating existing ClusterCIDR: %v", clusterCIDR) + // Create an event for invalid ClusterCIDRs, do not crash on failures. + if err := ra.reconcileBootstrap(&clusterCIDR); err != nil { + klog.Errorf("Error while regenerating existing ClusterCIDR: %v", err) + ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error()) + } + } + + clusterCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: createClusterCIDRHandler(ra.reconcileCreate), + DeleteFunc: createClusterCIDRHandler(ra.reconcileDelete), + }) + + if allocatorParams.ServiceCIDR != nil { + ra.filterOutServiceRange(allocatorParams.ServiceCIDR) + } else { + klog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.") + } + + if allocatorParams.SecondaryServiceCIDR != nil { + ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR) + } else { + klog.V(0).Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.") + } + + if nodeList != nil { + for _, node := range nodeList.Items { + if len(node.Spec.PodCIDRs) == 0 { + klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name) + continue + } + klog.V(0).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDRs) + if err := ra.occupyCIDRs(&node); err != nil { + // This will happen if: + // 1. We find garbage in the podCIDRs field. Retrying is useless. + // 2. CIDR out of range: This means ClusterCIDR is not yet created + // This error will keep crashing controller-manager until the + // appropriate ClusterCIDR has been created + return nil, err + } + } + } + + nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: controllerutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR), + UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error { + // If the PodCIDRs list is not empty we either: + // - already processed a Node that already had CIDRs after NC restarted + // (cidr is marked as used), + // - already processed a Node successfully and allocated CIDRs for it + // (cidr is marked as used), + // - already processed a Node but we saw a "timeout" response and + // request eventually got through in this case we haven't released + // the allocated CIDRs (cidr is still marked as used). + // There's a possible error here: + // - NC sees a new Node and assigns CIDRs X,Y.. to it, + // - Update Node call fails with a timeout, + // - Node is updated by some other component, NC sees an update and + // assigns CIDRs A,B.. to the Node, + // - Both CIDR X,Y.. and CIDR A,B.. are marked as used in the local cache, + // even though Node sees only CIDR A,B.. + // The problem here is that in in-memory cache we see CIDR X,Y.. as marked, + // which prevents it from being assigned to any new node. The cluster + // state is correct. + // Restart of NC fixes the issue. + if len(newNode.Spec.PodCIDRs) == 0 { + return ra.AllocateOrOccupyCIDR(newNode) + } + return nil + }), + DeleteFunc: controllerutil.CreateDeleteNodeHandler(ra.ReleaseCIDR), + }) + + return ra, nil +} + +func (r *multiCIDRRangeAllocator) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + + klog.Infof("Starting Multi CIDR Range allocator") + defer klog.Infof("Shutting down Multi CIDR Range allocator") + + if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", stopCh, r.nodesSynced, r.clusterCIDRSynced) { + return + } + + // raWaitGroup is used to wait for the RangeAllocator to finish the goroutines. + var raWaitGroup sync.WaitGroup + + for i := 0; i < cidrUpdateWorkers; i++ { + raWaitGroup.Add(1) + go func() { + defer raWaitGroup.Done() + r.worker(stopCh) + }() + } + + raWaitGroup.Wait() + + <-stopCh +} + +func (r *multiCIDRRangeAllocator) worker(stopChan <-chan struct{}) { + for { + select { + case workItem, ok := <-r.nodeCIDRUpdateChannel: + if !ok { + klog.Error("Channel nodeCIDRUpdateChannel was unexpectedly closed") + return + } + r.lock.Lock() + if err := r.updateCIDRsAllocation(workItem); err == nil { + klog.V(3).Infof("Updated CIDR for %q", workItem.nodeName) + } else { + klog.Errorf("Error updating CIDR for %q: %v", workItem.nodeName, err) + if canRetry, timeout := r.retryParams(workItem.nodeName); canRetry { + klog.V(2).Infof("Retrying update for %q after %v", workItem.nodeName, timeout) + time.AfterFunc(timeout, func() { + // Requeue the failed node for update again. + r.nodeCIDRUpdateChannel <- workItem + }) + continue + } + klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem.nodeName) + } + r.removeNodeFromProcessing(workItem.nodeName) + r.lock.Unlock() + case <-stopChan: + klog.Infof("MultiCIDRRangeAllocator worker is stopping.") + return + } + } +} + +// createClusterCIDRHandler creates clusterCIDR handler. +func createClusterCIDRHandler(f func(ccc *networkingv1alpha1.ClusterCIDR) error) func(obj interface{}) { + return func(originalObj interface{}) { + ccc := originalObj.(*networkingv1alpha1.ClusterCIDR) + if err := f(ccc); err != nil { + utilruntime.HandleError(fmt.Errorf("error while processing ClusterCIDR Add/Delete: %w", err)) + } + } +} + +// needToAddFinalizer checks if a finalizer should be added to the object. +func needToAddFinalizer(obj metav1.Object, finalizer string) bool { + return obj.GetDeletionTimestamp() == nil && !slice.ContainsString(obj.GetFinalizers(), + finalizer, nil) +} + +func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error { + startTime := time.Now() + defer func() { + klog.V(4).Infof("Finished syncing clusterCIDR request %q (%v)", key, time.Since(startTime)) + }() + + clusterCIDR, err := r.clusterCIDRLister.Get(key) + if apierrors.IsNotFound(err) { + klog.V(3).Infof("clusterCIDR has been deleted: %v", key) + return nil + } + + if err != nil { + return err + } + + // Check the DeletionTimestamp to determine if object is under deletion. + if !clusterCIDR.DeletionTimestamp.IsZero() { + return r.reconcileDelete(clusterCIDR) + } + return r.reconcileCreate(clusterCIDR) +} + +func (r *multiCIDRRangeAllocator) insertNodeToProcessing(nodeName string) bool { + if _, found := r.nodesInProcessing[nodeName]; found { + return false + } + r.nodesInProcessing[nodeName] = &multiCIDRNodeProcessingInfo{} + return true +} + +func (r *multiCIDRRangeAllocator) removeNodeFromProcessing(nodeName string) { + klog.Infof("Removing node %q from processing", nodeName) + delete(r.nodesInProcessing, nodeName) +} + +func (r *multiCIDRRangeAllocator) retryParams(nodeName string) (bool, time.Duration) { + r.lock.Lock() + defer r.lock.Unlock() + + entry, ok := r.nodesInProcessing[nodeName] + if !ok { + klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName) + return false, 0 + } + + count := entry.retries + 1 + if count > updateMaxRetries { + return false, 0 + } + r.nodesInProcessing[nodeName].retries = count + + return true, multiCIDRNodeUpdateRetryTimeout(count) +} + +func multiCIDRNodeUpdateRetryTimeout(count int) time.Duration { + timeout := updateRetryTimeout + for i := 0; i < count && timeout < maxUpdateRetryTimeout; i++ { + timeout *= 2 + } + if timeout > maxUpdateRetryTimeout { + timeout = maxUpdateRetryTimeout + } + return time.Duration(timeout.Nanoseconds()/2 + rand.Int63n(timeout.Nanoseconds())) +} + +// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet. +func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error { + + err := func(node *v1.Node) error { + + if len(node.Spec.PodCIDRs) == 0 { + return nil + } + + clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node) + if err != nil { + return err + } + + for _, clusterCIDR := range clusterCIDRList { + occupiedCount := 0 + + for _, cidr := range node.Spec.PodCIDRs { + _, podCIDR, err := netutil.ParseCIDRSloppy(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err) + } + + klog.Infof("occupy CIDR %s for node: %s", cidr, node.Name) + + if err := r.Occupy(clusterCIDR, podCIDR); err != nil { + klog.V(3).Infof("Could not occupy cidr: %v, trying next range: %w", node.Spec.PodCIDRs, err) + break + } + + occupiedCount++ + } + + // Mark CIDRs as occupied only if the CCC is able to occupy all the node CIDRs. + if occupiedCount == len(node.Spec.PodCIDRs) { + clusterCIDR.AssociatedNodes[node.Name] = true + return nil + } + } + + return fmt.Errorf("could not occupy cidrs: %v, No matching ClusterCIDRs found", node.Spec.PodCIDRs) + }(node) + + r.removeNodeFromProcessing(node.Name) + return err +} + +// associatedCIDRSet returns the CIDRSet, based on the ip family of the CIDR. +func (r *multiCIDRRangeAllocator) associatedCIDRSet(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) (*cidrset.MultiCIDRSet, error) { + switch { + case netutil.IsIPv4CIDR(cidr): + return clusterCIDR.IPv4CIDRSet, nil + case netutil.IsIPv6CIDR(cidr): + return clusterCIDR.IPv6CIDRSet, nil + default: + return nil, fmt.Errorf("invalid cidr: %v", cidr) + } +} + +// Occupy marks the CIDR as occupied in the allocatedCIDRMap of the cidrSet. +func (r *multiCIDRRangeAllocator) Occupy(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error { + currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr) + if err != nil { + return err + } + + if err := currCIDRSet.Occupy(cidr); err != nil { + return fmt.Errorf("unable to occupy cidr %v in cidrSet", cidr) + } + + return nil +} + +// Release marks the CIDR as free in the cidrSet used bitmap, +// Also removes the CIDR from the allocatedCIDRSet. +func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error { + currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr) + if err != nil { + return err + } + + if err := currCIDRSet.Release(cidr); err != nil { + klog.Infof("Unable to release cidr %v in cidrSet", cidr) + return err + } + + return nil +} + +// AllocateOrOccupyCIDR allocates a CIDR to the node if the node doesn't have a +// CIDR already allocated, occupies the CIDR and marks as used if the node +// already has a PodCIDR assigned. +// WARNING: If you're adding any return calls or defer any more work from this +// function you have to make sure to update nodesInProcessing properly with the +// disposition of the node when the work is done. +func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { + r.lock.Lock() + defer r.lock.Unlock() + + if node == nil { + return nil + } + + if !r.insertNodeToProcessing(node.Name) { + klog.Infof("Node %v is already in a process of CIDR assignment.", node.Name) + return nil + } + + if len(node.Spec.PodCIDRs) > 0 { + return r.occupyCIDRs(node) + } + + cidrs, clusterCIDR, err := r.prioritizedCIDRs(node) + if err != nil { + r.removeNodeFromProcessing(node.Name) + controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") + return fmt.Errorf("failed to get cidrs for node %s", node.Name) + } + + if len(cidrs) == 0 { + r.removeNodeFromProcessing(node.Name) + controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") + return fmt.Errorf("no cidrSets with matching labels found for node %s", node.Name) + } + + // allocate and queue the assignment. + allocated := multiCIDRNodeReservedCIDRs{ + nodeReservedCIDRs: nodeReservedCIDRs{ + nodeName: node.Name, + allocatedCIDRs: cidrs, + }, + clusterCIDR: clusterCIDR, + } + + return r.updateCIDRsAllocation(allocated) +} + +// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets. +func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error { + r.lock.Lock() + defer r.lock.Unlock() + + if node == nil || len(node.Spec.PodCIDRs) == 0 { + return nil + } + + clusterCIDR, err := r.allocatedClusterCIDR(node) + if err != nil { + return err + } + + for _, cidr := range node.Spec.PodCIDRs { + _, podCIDR, err := netutil.ParseCIDRSloppy(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err) + } + + klog.Infof("release CIDR %s for node: %s", cidr, node.Name) + if err := r.Release(clusterCIDR, podCIDR); err != nil { + return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err) + } + } + + // Remove the node from the ClusterCIDR AssociatedNodes. + delete(clusterCIDR.AssociatedNodes, node.Name) + + return nil +} + +// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs +// so that they won't be assignable. +func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { + // Checks if service CIDR has a nonempty intersection with cluster + // CIDR. It is the case if either clusterCIDR contains serviceCIDR with + // clusterCIDR's Mask applied (this means that clusterCIDR contains + // serviceCIDR) or vice versa (which means that serviceCIDR contains + // clusterCIDR). + for _, clusterCIDRList := range r.cidrMap { + for _, clusterCIDR := range clusterCIDRList { + if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil { + klog.Errorf("unable to occupy service CIDR: %w", err) + } + } + } +} + +func (r *multiCIDRRangeAllocator) occupyServiceCIDR(clusterCIDR *cidrset.ClusterCIDR, serviceCIDR *net.IPNet) error { + + cidrSet, err := r.associatedCIDRSet(clusterCIDR, serviceCIDR) + if err != nil { + return err + } + + cidr := cidrSet.ClusterCIDR + + // No need to occupy as Service CIDR doesn't intersect with the current ClusterCIDR. + if !cidr.Contains(serviceCIDR.IP.Mask(cidr.Mask)) && !serviceCIDR.Contains(cidr.IP.Mask(serviceCIDR.Mask)) { + return nil + } + + if err := r.Occupy(clusterCIDR, serviceCIDR); err != nil { + return fmt.Errorf("error filtering out service cidr %v from cluster cidr %v: %w", cidr, serviceCIDR, err) + } + + return nil +} + +// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server. +func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReservedCIDRs) error { + err := func(data multiCIDRNodeReservedCIDRs) error { + cidrsString := ipnetToStringList(data.allocatedCIDRs) + node, err := r.nodeLister.Get(data.nodeName) + if err != nil { + klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) + return err + } + + // if cidr list matches the proposed, + // then we possibly updated this node + // and just failed to ack the success. + if len(node.Spec.PodCIDRs) == len(data.allocatedCIDRs) { + match := true + for idx, cidr := range cidrsString { + if node.Spec.PodCIDRs[idx] != cidr { + match = false + break + } + } + if match { + klog.V(4).Infof("Node %q already has allocated CIDR %q. It matches the proposed one.", node.Name, data.allocatedCIDRs) + return nil + } + } + + // node has cidrs allocated, release the reserved. + if len(node.Spec.PodCIDRs) != 0 { + klog.Errorf("Node %q already has a CIDR allocated %q. Releasing the new one.", node.Name, node.Spec.PodCIDRs) + for _, cidr := range data.allocatedCIDRs { + if err := r.Release(data.clusterCIDR, cidr); err != nil { + return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err) + } + } + return nil + } + + // If we reached here, it means that the node has no CIDR currently assigned. So we set it. + for i := 0; i < cidrUpdateRetries; i++ { + if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil { + data.clusterCIDR.AssociatedNodes[node.Name] = true + klog.Infof("Set node %q PodCIDR to %q", node.Name, cidrsString) + return nil + } + } + // failed release back to the pool. + klog.Errorf("Failed to update node %q PodCIDR to %q after %d attempts: %v", node.Name, cidrsString, cidrUpdateRetries, err) + controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") + // We accept the fact that we may leak CIDRs here. This is safer than releasing + // them in case when we don't know if request went through. + // NodeController restart will return all falsely allocated CIDRs to the pool. + if !apierrors.IsServerTimeout(err) { + klog.Errorf("CIDR assignment for node %q failed: %v. Releasing allocated CIDR", node.Name, err) + for _, cidr := range data.allocatedCIDRs { + if err := r.Release(data.clusterCIDR, cidr); err != nil { + return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err) + } + } + } + return err + }(data) + + r.removeNodeFromProcessing(data.nodeName) + return err +} + +// defaultNodeSelector generates a label with defaultClusterCIDRKey as the key and +// defaultClusterCIDRValue as the value, it is an internal nodeSelector matching all +// nodes. Only used if no ClusterCIDR selects the node. +func defaultNodeSelector() ([]byte, error) { + nodeSelector := &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: defaultClusterCIDRKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{defaultClusterCIDRValue}, + }, + }, + }, + }, + } + + marshalledSelector, err := nodeSelector.Marshal() + if err != nil { + return nil, err + } + + return marshalledSelector, nil +} + +// prioritizedCIDRs returns a list of CIDRs to be allocated to the node. +// Returns 1 CIDR if single stack. +// Returns 2 CIDRs , 1 from each ip family if dual stack. +func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) { + clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node) + if err != nil { + return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err) + } + + for _, clusterCIDR := range clusterCIDRList { + cidrs := make([]*net.IPNet, 0) + if clusterCIDR.IPv4CIDRSet != nil { + cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet) + if err != nil { + klog.V(3).Infof("unable to allocate IPv4 CIDR, trying next range: %w", err) + continue + } + cidrs = append(cidrs, cidr) + } + + if clusterCIDR.IPv6CIDRSet != nil { + cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet) + if err != nil { + klog.V(3).Infof("unable to allocate IPv6 CIDR, trying next range: %w", err) + continue + } + cidrs = append(cidrs, cidr) + } + + return cidrs, clusterCIDR, nil + } + return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s, no available CIDRs", node.Name) +} + +func (r *multiCIDRRangeAllocator) allocateCIDR(clusterCIDR *cidrset.ClusterCIDR, cidrSet *cidrset.MultiCIDRSet) (*net.IPNet, error) { + + for evaluated := 0; evaluated < cidrSet.MaxCIDRs; evaluated++ { + candidate, lastEvaluated, err := cidrSet.NextCandidate() + if err != nil { + return nil, err + } + + evaluated += lastEvaluated + + if r.cidrInAllocatedList(candidate) { + continue + } + + // Deep Check. + if r.cidrOverlapWithAllocatedList(candidate) { + continue + } + + // Mark the CIDR as occupied in the map. + if err := r.Occupy(clusterCIDR, candidate); err != nil { + return nil, err + } + // Increment the evaluated count metric. + cidrSet.UpdateEvaluatedCount(evaluated) + return candidate, nil + } + return nil, &cidrset.CIDRRangeNoCIDRsRemainingErr{ + CIDR: cidrSet.Label, + } +} + +func (r *multiCIDRRangeAllocator) cidrInAllocatedList(cidr *net.IPNet) bool { + for _, clusterCIDRList := range r.cidrMap { + for _, clusterCIDR := range clusterCIDRList { + cidrSet, _ := r.associatedCIDRSet(clusterCIDR, cidr) + if cidrSet != nil { + if ok := cidrSet.AllocatedCIDRMap[cidr.String()]; ok { + return true + } + } + } + } + return false +} + +func (r *multiCIDRRangeAllocator) cidrOverlapWithAllocatedList(cidr *net.IPNet) bool { + for _, clusterCIDRList := range r.cidrMap { + for _, clusterCIDR := range clusterCIDRList { + cidrSet, _ := r.associatedCIDRSet(clusterCIDR, cidr) + if cidrSet != nil { + for allocated := range cidrSet.AllocatedCIDRMap { + _, allocatedCIDR, _ := netutil.ParseCIDRSloppy(allocated) + if cidr.Contains(allocatedCIDR.IP.Mask(cidr.Mask)) || allocatedCIDR.Contains(cidr.IP.Mask(allocatedCIDR.Mask)) { + return true + } + } + } + } + } + return false +} + +// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated. +func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.ClusterCIDR, error) { + clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node) + if err != nil { + return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err) + } + + for _, clusterCIDR := range clusterCIDRList { + if ok := clusterCIDR.AssociatedNodes[node.Name]; ok { + return clusterCIDR, nil + } + } + return nil, fmt.Errorf("no clusterCIDR found associated with node: %s", node.Name) +} + +// orderedMatchingClusterCIDRs returns a list of all the ClusterCIDRs matching the node labels. +// The list is ordered with the following priority, which act as tie-breakers. +// P0: ClusterCIDR with higher number of matching labels has the highest priority. +// P1: ClusterCIDR having cidrSet with fewer allocatable Pod CIDRs has higher priority. +// P2: ClusterCIDR with a PerNodeMaskSize having fewer IPs has higher priority. +// P3: ClusterCIDR having label with lower alphanumeric value has higher priority. +// P4: ClusterCIDR with a cidrSet having a smaller IP address value has a higher priority. +func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node) ([]*cidrset.ClusterCIDR, error) { + matchingCIDRs := make([]*cidrset.ClusterCIDR, 0) + pq := make(PriorityQueue, 0) + + for label, clusterCIDRList := range r.cidrMap { + labelsMatch, matchCnt, err := r.matchCIDRLabels(node, []byte(label)) + if err != nil { + return nil, err + } + + if !labelsMatch { + continue + } + + for _, clusterCIDR := range clusterCIDRList { + pqItem := &PriorityQueueItem{ + clusterCIDR: clusterCIDR, + labelMatchCount: matchCnt, + selectorString: label, + } + + // Only push the CIDRsets which are not marked for termination. + if !clusterCIDR.Terminating { + heap.Push(&pq, pqItem) + } + } + } + + // Remove the ClusterCIDRs from the PriorityQueue. + // They arrive in descending order of matchCnt, + // if matchCnt is equal it is ordered in ascending order of labels. + for pq.Len() > 0 { + pqItem := heap.Pop(&pq).(*PriorityQueueItem) + matchingCIDRs = append(matchingCIDRs, pqItem.clusterCIDR) + } + + // Append the catch all CIDR config. + defaultSelector, err := defaultNodeSelector() + if err != nil { + return nil, err + } + if clusterCIDRList, ok := r.cidrMap[string(defaultSelector)]; ok { + matchingCIDRs = append(matchingCIDRs, clusterCIDRList...) + } + return matchingCIDRs, nil +} + +// matchCIDRLabels Matches the Node labels to CIDR Configs. +// Returns true only if all the labels match, also returns the count of matching labels. +func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label []byte) (bool, int, error) { + var labelSet labels.Set + var matchCnt int + + labelsMatch := false + selector := &v1.NodeSelector{} + err := selector.Unmarshal(label) + if err != nil { + klog.Errorf("Unable to unmarshal node selector for label %v: %v", label, err) + return labelsMatch, 0, err + } + + ls, err := v1helper.NodeSelectorAsSelector(selector) + if err != nil { + klog.Errorf("Unable to convert NodeSelector to labels.Selector: %v", err) + return labelsMatch, 0, err + } + reqs, selectable := ls.Requirements() + + labelSet = node.ObjectMeta.Labels + if selectable { + matchCnt = 0 + for _, req := range reqs { + if req.Matches(labelSet) { + matchCnt += 1 + } + } + if matchCnt == len(reqs) { + labelsMatch = true + } + } + return labelsMatch, matchCnt, err +} + +// Methods for handling ClusterCIDRs. + +// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has +// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags +// to appropriate ClusterCIDR fields. +func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDRList, + allocatorParams CIDRAllocatorParams) { + // Create default ClusterCIDR only if --cluster-cidr has been configured + if len(allocatorParams.ClusterCIDRs) == 0 { + return + } + + for _, clusterCIDR := range existingConfigList.Items { + if clusterCIDR.Name == defaultClusterCIDRName { + // Default ClusterCIDR already exists, no further action required. + klog.V(3).Infof("Default ClusterCIDR %s already exists", defaultClusterCIDRName) + return + } + } + + // Create a default ClusterCIDR as it is not already created. + defaultCIDRConfig := &networkingv1alpha1.ClusterCIDR{ + TypeMeta: metav1.TypeMeta{ + APIVersion: defaultClusterCIDRAPIVersion, + Kind: "ClusterCIDR", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: defaultClusterCIDRName, + }, + Spec: networkingv1alpha1.ClusterCIDRSpec{ + PerNodeHostBits: minPerNodeHostBits, + }, + } + + ipv4PerNodeHostBits := int32(math.MinInt32) + ipv6PerNodeHostBits := int32(math.MinInt32) + isDualstack := false + if len(allocatorParams.ClusterCIDRs) == 2 { + isDualstack = true + } + + for i, cidr := range allocatorParams.ClusterCIDRs { + if netutil.IsIPv4CIDR(cidr) { + defaultCIDRConfig.Spec.IPv4 = cidr.String() + ipv4PerNodeHostBits = ipv4MaxCIDRMask - int32(allocatorParams.NodeCIDRMaskSizes[i]) + if !isDualstack && ipv4PerNodeHostBits > minPerNodeHostBits { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv4PerNodeHostBits + } + } else if netutil.IsIPv6CIDR(cidr) { + defaultCIDRConfig.Spec.IPv6 = cidr.String() + ipv6PerNodeHostBits = ipv6MaxCIDRMask - int32(allocatorParams.NodeCIDRMaskSizes[i]) + if !isDualstack && ipv6PerNodeHostBits > minPerNodeHostBits { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv6PerNodeHostBits + } + } + } + + if isDualstack { + // In case of dualstack CIDRs, currently the default values for PerNodeMaskSize are + // 24 for IPv4 (PerNodeHostBits=8) and 64 for IPv6(PerNodeHostBits=64), there is no + // requirement for the PerNodeHostBits to be equal for IPv4 and IPv6, However with + // the introduction of ClusterCIDRs, we enforce the requirement for a single + // PerNodeHostBits field, thus we choose the minimum PerNodeHostBits value, to avoid + // overflow for IPv4 CIDRs. + if ipv4PerNodeHostBits >= minPerNodeHostBits && ipv4PerNodeHostBits <= ipv6PerNodeHostBits { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv4PerNodeHostBits + } else if ipv6PerNodeHostBits >= minPerNodeHostBits && ipv6PerNodeHostBits <= ipv4MaxCIDRMask { + defaultCIDRConfig.Spec.PerNodeHostBits = ipv6PerNodeHostBits + } + } + + existingConfigList.Items = append(existingConfigList.Items, *defaultCIDRConfig) + + return +} + +// reconcileCreate handles create ClusterCIDR events. +func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + r.lock.Lock() + defer r.lock.Unlock() + + if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) { + klog.V(3).Infof("Creating ClusterCIDR %s", clusterCIDR.Name) + if err := r.createClusterCIDR(clusterCIDR, false); err != nil { + klog.Errorf("Unable to create ClusterCIDR %s : %v", clusterCIDR.Name, err) + return err + } + } + return nil +} + +// reconcileBootstrap handles creation of existing ClusterCIDRs. +// adds a finalizer if not already present. +func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + r.lock.Lock() + defer r.lock.Unlock() + + terminating := false + // Create the ClusterCIDR only if the Spec has not been modified. + if clusterCIDR.Generation > 1 { + terminating = true + err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name) + klog.Errorf("ClusterCIDR Modified: %v", err) + } + + klog.V(2).Infof("Creating ClusterCIDR %s during bootstrap", clusterCIDR.Name) + if err := r.createClusterCIDR(clusterCIDR, terminating); err != nil { + klog.Errorf("Unable to create ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + + return nil +} + +// createClusterCIDR creates and maps the cidrSets in the cidrMap. +func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error { + nodeSelector, err := r.nodeSelectorKey(clusterCIDR) + if err != nil { + return fmt.Errorf("unable to get labelSelector key: %w", err) + } + + clusterCIDRSet, err := r.createClusterCIDRSet(clusterCIDR, terminating) + if err != nil { + return fmt.Errorf("invalid ClusterCIDR: %w", err) + } + + if clusterCIDRSet.IPv4CIDRSet == nil && clusterCIDRSet.IPv6CIDRSet == nil { + return errors.New("invalid ClusterCIDR: must provide IPv4 and/or IPv6 config") + } + + if err := r.mapClusterCIDRSet(r.cidrMap, nodeSelector, clusterCIDRSet); err != nil { + return fmt.Errorf("unable to map clusterCIDRSet: %w", err) + } + + // Make a copy so we don't mutate the shared informer cache. + updatedClusterCIDR := clusterCIDR.DeepCopy() + if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) { + updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer) + } + + if updatedClusterCIDR.ResourceVersion == "" { + // Create is only used for creating default ClusterCIDR. + if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(context.TODO(), updatedClusterCIDR, metav1.CreateOptions{}); err != nil { + klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + } else { + // Update the ClusterCIDR object when called from reconcileCreate. + if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), updatedClusterCIDR, metav1.UpdateOptions{}); err != nil { + klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + } + + return nil +} + +// createClusterCIDRSet creates and returns new cidrset.ClusterCIDR based on ClusterCIDR API object. +func (r *multiCIDRRangeAllocator) createClusterCIDRSet(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) (*cidrset.ClusterCIDR, error) { + + clusterCIDRSet := &cidrset.ClusterCIDR{ + Name: clusterCIDR.Name, + AssociatedNodes: make(map[string]bool, 0), + Terminating: terminating, + } + + if clusterCIDR.Spec.IPv4 != "" { + _, ipv4CIDR, err := netutil.ParseCIDRSloppy(clusterCIDR.Spec.IPv4) + if err != nil { + return nil, fmt.Errorf("unable to parse provided IPv4 CIDR: %w", err) + } + clusterCIDRSet.IPv4CIDRSet, err = cidrset.NewMultiCIDRSet(ipv4CIDR, int(clusterCIDR.Spec.PerNodeHostBits)) + if err != nil { + return nil, fmt.Errorf("unable to create IPv4 cidrSet: %w", err) + } + } + + if clusterCIDR.Spec.IPv6 != "" { + _, ipv6CIDR, err := netutil.ParseCIDRSloppy(clusterCIDR.Spec.IPv6) + if err != nil { + return nil, fmt.Errorf("unable to parse provided IPv6 CIDR: %w", err) + } + clusterCIDRSet.IPv6CIDRSet, err = cidrset.NewMultiCIDRSet(ipv6CIDR, int(clusterCIDR.Spec.PerNodeHostBits)) + if err != nil { + return nil, fmt.Errorf("unable to create IPv6 cidrSet: %w", err) + } + } + + return clusterCIDRSet, nil +} + +// mapClusterCIDRSet maps the ClusterCIDRSet to the provided labelSelector in the cidrMap. +func (r *multiCIDRRangeAllocator) mapClusterCIDRSet(cidrMap map[string][]*cidrset.ClusterCIDR, nodeSelector string, clusterCIDRSet *cidrset.ClusterCIDR) error { + if clusterCIDRSet == nil { + return errors.New("invalid clusterCIDRSet, clusterCIDRSet cannot be nil") + } + + if clusterCIDRSetList, ok := cidrMap[nodeSelector]; ok { + cidrMap[nodeSelector] = append(clusterCIDRSetList, clusterCIDRSet) + } else { + cidrMap[nodeSelector] = []*cidrset.ClusterCIDR{clusterCIDRSet} + } + return nil +} + +// reconcileDelete deletes the ClusterCIDR object and removes the finalizer. +func (r *multiCIDRRangeAllocator) reconcileDelete(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + r.lock.Lock() + defer r.lock.Unlock() + + if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) { + if err := r.deleteClusterCIDR(clusterCIDR); err != nil { + return err + } + // Remove the finalizer as delete is successful. + cccCopy := clusterCIDR.DeepCopy() + cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil) + if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), clusterCIDR, metav1.UpdateOptions{}); err != nil { + klog.V(2).Infof("Error removing finalizer for ClusterCIDR %s: %v", clusterCIDR.Name, err) + return err + } + klog.V(2).Infof("Removed finalizer for ClusterCIDR %s", clusterCIDR.Name) + } + return nil +} + +// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap. +func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { + + labelSelector, err := r.nodeSelectorKey(clusterCIDR) + if err != nil { + return fmt.Errorf("unable to delete cidr: %w", err) + } + + clusterCIDRSetList, ok := r.cidrMap[labelSelector] + if !ok { + klog.Infof("Label %s not found in CIDRMap, proceeding with delete", labelSelector) + return nil + } + + for i, clusterCIDRSet := range clusterCIDRSetList { + if clusterCIDRSet.Name != clusterCIDR.Name { + continue + } + + // Mark clusterCIDRSet as terminating. + clusterCIDRSet.Terminating = true + + // Allow deletion only if no nodes are associated with the ClusterCIDR. + if len(clusterCIDRSet.AssociatedNodes) > 0 { + return fmt.Errorf("ClusterCIDRSet %s marked as terminating, won't be deleted until all associated nodes are deleted", clusterCIDR.Name) + } + + // Remove the label from the map if this was the only clusterCIDR associated + // with it. + if len(clusterCIDRSetList) == 1 { + delete(r.cidrMap, labelSelector) + return nil + } + + clusterCIDRSetList = append(clusterCIDRSetList[:i], clusterCIDRSetList[i+1:]...) + r.cidrMap[labelSelector] = clusterCIDRSetList + return nil + } + klog.V(2).Info("clusterCIDR not found, proceeding with delete", "Name", clusterCIDR.Name, "label", labelSelector) + return nil +} + +func (r *multiCIDRRangeAllocator) nodeSelectorKey(clusterCIDR *networkingv1alpha1.ClusterCIDR) (string, error) { + var nodeSelector []byte + var err error + + if clusterCIDR.Spec.NodeSelector != nil { + nodeSelector, err = clusterCIDR.Spec.NodeSelector.Marshal() + } else { + nodeSelector, err = defaultNodeSelector() + } + + if err != nil { + return "", err + } + + return string(nodeSelector), nil +} + +func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) { + var clusterCIDRList *networkingv1alpha1.ClusterCIDRList + // We must poll because apiserver might not be up. This error causes + // controller manager to restart. + startTimestamp := time.Now() + + // start with 2s, multiply the duration by 1.6 each step, 11 steps = 9.7 minutes + backoff := wait.Backoff{ + Duration: 2 * time.Second, + Factor: 1.6, + Steps: 11, + } + + if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) { + var err error + clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{ + FieldSelector: fields.Everything().String(), + LabelSelector: labels.Everything().String(), + }) + if err != nil { + klog.Errorf("Failed to list all clusterCIDRs: %v", err) + return false, nil + } + return true, nil + }); pollErr != nil { + klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp)) + return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map", + apiserverStartupGracePeriod) + } + return clusterCIDRList, nil +} diff --git a/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go new file mode 100644 index 00000000000..6569b0a5d13 --- /dev/null +++ b/pkg/controller/nodeipam/ipam/multi_cidr_range_allocator_test.go @@ -0,0 +1,1868 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "context" + "fmt" + "net" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + networkingv1alpha1 "k8s.io/api/networking/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" + "k8s.io/kubernetes/pkg/controller" + cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" + "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test" + "k8s.io/kubernetes/pkg/controller/testutil" + utilnet "k8s.io/utils/net" +) + +type testCaseMultiCIDR struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + allocatorParams CIDRAllocatorParams + testCIDRMap map[string][]*cidrset.ClusterCIDR + // key is index of the cidr allocated. + expectedAllocatedCIDR map[int]string + allocatedCIDRs map[int][]string + // should controller creation fail? + ctrlCreateFail bool +} + +type testClusterCIDR struct { + perNodeHostBits int32 + ipv4CIDR string + ipv6CIDR string + name string +} + +type testNodeSelectorRequirement struct { + key string + operator v1.NodeSelectorOperator + values []string +} + +func getTestNodeSelector(requirements []testNodeSelectorRequirement) string { + testNodeSelector := &v1.NodeSelector{} + + for _, nsr := range requirements { + nst := v1.NodeSelectorTerm{ + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nsr.key, + Operator: nsr.operator, + Values: nsr.values, + }, + }, + } + testNodeSelector.NodeSelectorTerms = append(testNodeSelector.NodeSelectorTerms, nst) + } + + marshalledSelector, _ := testNodeSelector.Marshal() + return string(marshalledSelector) +} + +func getTestCidrMap(testClusterCIDRMap map[string][]*testClusterCIDR) map[string][]*cidrset.ClusterCIDR { + cidrMap := make(map[string][]*cidrset.ClusterCIDR, 0) + for labels, testClusterCIDRList := range testClusterCIDRMap { + clusterCIDRList := make([]*cidrset.ClusterCIDR, 0) + for _, testClusterCIDR := range testClusterCIDRList { + clusterCIDR := &cidrset.ClusterCIDR{ + Name: testClusterCIDR.name, + AssociatedNodes: make(map[string]bool, 0), + } + + if testClusterCIDR.ipv4CIDR != "" { + _, testCIDR, _ := utilnet.ParseCIDRSloppy(testClusterCIDR.ipv4CIDR) + testCIDRSet, _ := cidrset.NewMultiCIDRSet(testCIDR, int(testClusterCIDR.perNodeHostBits)) + clusterCIDR.IPv4CIDRSet = testCIDRSet + } + if testClusterCIDR.ipv6CIDR != "" { + _, testCIDR, _ := utilnet.ParseCIDRSloppy(testClusterCIDR.ipv6CIDR) + testCIDRSet, _ := cidrset.NewMultiCIDRSet(testCIDR, int(testClusterCIDR.perNodeHostBits)) + clusterCIDR.IPv6CIDRSet = testCIDRSet + } + clusterCIDRList = append(clusterCIDRList, clusterCIDR) + } + cidrMap[labels] = clusterCIDRList + } + return cidrMap +} + +func getClusterCIDRList(nodeName string, cidrMap map[string][]*cidrset.ClusterCIDR) ([]*cidrset.ClusterCIDR, error) { + labelSelector := getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{nodeName}, + }, + }) + if clusterCIDRList, ok := cidrMap[labelSelector]; ok { + return clusterCIDRList, nil + } + return nil, fmt.Errorf("unable to get clusterCIDR for node: %s", nodeName) +} + +func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) { + // all tests operate on a single node. + testCaseMultiCIDRs := []testCaseMultiCIDR{ + { + description: "success, single stack no node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, dual stack no node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, single stack correct node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr-allocated", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, dual stack both allocated correctly", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24", "ace:cab:deca::1/120"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-allocated", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + // failure cases. + { + description: "fail, single stack incorrect node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"172.10.0.1/24"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr-allocate-fail", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + { + description: "fail, dualstack node allocating from non existing cidr", + + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24", "a00::/86"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-allocate-fail", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + { + description: "fail, dualstack node allocating bad v4", + + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"172.10.0.1/24", "ace:cab:deca::1/120"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-bad-v4", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + { + description: "fail, dualstack node allocating bad v6", + + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24", "cdd::/86"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-bad-v6", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + } + + // test function + for _, tc := range testCaseMultiCIDRs { + t.Run(tc.description, func(t *testing.T) { + // Initialize the range allocator. + fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) + + _, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap) + if err == nil && tc.ctrlCreateFail { + t.Fatalf("creating range allocator was expected to fail, but it did not") + } + if err != nil && !tc.ctrlCreateFail { + t.Fatalf("creating range allocator was expected to succeed, but it did not") + } + }) + } +} + +func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) { + // Non-parallel test (overrides global var). + oldNodePollInterval := nodePollInterval + nodePollInterval = test.NodePollInterval + defer func() { + nodePollInterval = oldNodePollInterval + }() + + // all tests operate on a single node. + testCaseMultiCIDRs := []testCaseMultiCIDR{ + { + description: "When there's no ServiceCIDR return first CIDR in range", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/24", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.0/30", + }, + }, + { + description: "Correctly filter out ServiceCIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + NodeCIDRMaskSizes: []int{30}, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/24", + }, + }, + }), + // it should return first /30 CIDR after service range. + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.64/30", + }, + }, + { + description: "Correctly ignore already allocated CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "single-stack-cidr", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/24", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.64/30", "127.123.234.68/30", "127.123.234.72/30", "127.123.234.80/30"}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.76/30", + }, + }, + { + description: "Dualstack CIDRs, prioritize clusterCIDR with higher label match count", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/8", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/8", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "127.0.0.0/24", + 1: "abc:def:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, prioritize clusterCIDR with higher label match count, overlapping CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/8", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"10.0.0.0/24", "10.0.1.0/24", "10.0.2.0/24", "10.0.4.0/24"}, + 1: {"ace:cab:deca::/120"}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.3.0/24", + 1: "ace:cab:deca::100/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label match count, prioritize clusterCIDR with fewer allocatable pod CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/8", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/24", + ipv6CIDR: "ace:cab:deca::/120", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/24", + 1: "ace:cab:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count, non comparable allocatable pod CIDRs, prioritize clusterCIDR with lower perNodeMaskSize", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/23", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/120", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/24", + 1: "ace:cab:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count and allocatable pod CIDRs, prioritize clusterCIDR with lower perNodeMaskSize", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/24", + ipv6CIDR: "abc:def:deca::/120", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 0, + ipv4CIDR: "10.0.0.0/32", + ipv6CIDR: "ace:cab:deca::/128", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/32", + 1: "ace:cab:deca::/128", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count, allocatable pod CIDRs and allocatable IPs, prioritize clusterCIDR with lower alphanumeric label", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/16", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-2", + operator: v1.NodeSelectorOpIn, + values: []string{"label2"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.0.0/24", + 1: "abc:def:deca::/120", + }, + }, + { + description: "Dualstack CIDRs, clusterCIDR with equal label count, allocatable pod CIDRs, allocatable IPs and labels, prioritize clusterCIDR with smaller IP", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + "testLabel-1": "label1", + "testLabel-2": "label2", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: func() *net.IPNet { + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("127.123.234.0/26") + return serviceCIDR + }(), + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-1", + perNodeHostBits: 8, + ipv4CIDR: "127.123.234.0/16", + ipv6CIDR: "abc:def:deca::/112", + }, + }, + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + { + key: "testLabel-1", + operator: v1.NodeSelectorOpIn, + values: []string{"label1"}, + }, + }): { + { + name: "dual-stack-cidr-2", + perNodeHostBits: 8, + ipv4CIDR: "10.0.0.0/16", + ipv6CIDR: "ace:cab:deca::/112", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.0.0.0/24", + 1: "ace:cab:deca::/120", + }, + }, + { + description: "no double counting", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "nodepool1", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.0/24"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "testLabel-0": "nodepool1", + }, + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.2.0/24"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Labels: map[string]string{ + "testLabel-0": "nodepool1", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"nodepool1"}, + }, + }): { + { + name: "no-double-counting", + perNodeHostBits: 8, + ipv4CIDR: "10.10.0.0/22", + }, + }, + }), + expectedAllocatedCIDR: map[int]string{ + 0: "10.10.1.0/24", + }, + }, + } + + // test function + testFunc := func(tc testCaseMultiCIDR) { + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) + // Initialize the range allocator. + + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap) + if err != nil { + t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) + return + } + rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = test.AlwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + + // this is a bit of white box testing + // pre allocate the CIDRs as per the test + for _, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := utilnet.ParseCIDRSloppy(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + + clusterCIDRList, err := getClusterCIDRList("node0", rangeAllocator.cidrMap) + if err != nil { + t.Fatalf("%v: unexpected error when getting associated clusterCIDR for node %v %v", tc.description, "node0", err) + } + + occupied := false + for _, clusterCIDR := range clusterCIDRList { + if err := rangeAllocator.Occupy(clusterCIDR, cidr); err == nil { + occupied = true + break + } + } + if !occupied { + t.Fatalf("%v: unable to occupy CIDR %v", tc.description, allocated) + } + } + } + + updateCount := 0 + for _, node := range tc.fakeNodeHandler.Existing { + if node.Spec.PodCIDRs == nil { + updateCount++ + } + if err := allocator.AllocateOrOccupyCIDR(node); err != nil { + t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + } + if updateCount != 1 { + t.Fatalf("test error: all tests must update exactly one node") + } + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + + if len(tc.expectedAllocatedCIDR) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDR { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] != expectedPodCIDR { + t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + + // run the test cases + for _, tc := range testCaseMultiCIDRs { + testFunc(tc) + } +} + +func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) { + testCaseMultiCIDRs := []testCaseMultiCIDR{ + { + description: "When there's no ServiceCIDR return first CIDR in range", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "allocate-fail", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/28", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + }, + } + + testFunc := func(tc testCaseMultiCIDR) { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + + // Initialize the range allocator. + allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap) + if err != nil { + t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) + } + rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = test.AlwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + + // this is a bit of white box testing + // pre allocate the CIDRs as per the test + for _, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := utilnet.ParseCIDRSloppy(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + + clusterCIDRList, err := getClusterCIDRList("node0", rangeAllocator.cidrMap) + if err != nil { + t.Fatalf("%v: unexpected error when getting associated clusterCIDR for node %v %v", tc.description, "node0", err) + } + + occupied := false + for _, clusterCIDR := range clusterCIDRList { + if err := rangeAllocator.Occupy(clusterCIDR, cidr); err == nil { + occupied = true + break + } + } + if !occupied { + t.Fatalf("%v: unable to occupy CIDR %v", tc.description, allocated) + } + } + } + + if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil { + t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) + } + // We don't expect any updates, so just sleep for some time + time.Sleep(time.Second) + if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 { + t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy()) + } + if len(tc.expectedAllocatedCIDR) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDR { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] == expectedPodCIDR { + t.Errorf("%v: found cidr %v that should not be allocated on node with CIDRs:%v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + for _, tc := range testCaseMultiCIDRs { + testFunc(tc) + } +} + +type releasetestCaseMultiCIDR struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + testCIDRMap map[string][]*cidrset.ClusterCIDR + allocatorParams CIDRAllocatorParams + expectedAllocatedCIDRFirstRound map[int]string + expectedAllocatedCIDRSecondRound map[int]string + allocatedCIDRs map[int][]string + cidrsToRelease [][]string +} + +func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) { + // Non-parallel test (overrides global var) + oldNodePollInterval := nodePollInterval + nodePollInterval = test.NodePollInterval + defer func() { + nodePollInterval = oldNodePollInterval + }() + + testCaseMultiCIDRs := []releasetestCaseMultiCIDR{ + { + description: "Correctly release preallocated CIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "cidr-release", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/28", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + expectedAllocatedCIDRFirstRound: nil, + cidrsToRelease: [][]string{ + {"127.123.234.4/30"}, + }, + expectedAllocatedCIDRSecondRound: map[int]string{ + 0: "127.123.234.4/30", + }, + }, + { + description: "Correctly recycle CIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ServiceCIDR: nil, + SecondaryServiceCIDR: nil, + }, + testCIDRMap: getTestCidrMap( + map[string][]*testClusterCIDR{ + getTestNodeSelector([]testNodeSelectorRequirement{ + { + key: "testLabel-0", + operator: v1.NodeSelectorOpIn, + values: []string{"node0"}, + }, + }): { + { + name: "cidr-release", + perNodeHostBits: 2, + ipv4CIDR: "127.123.234.0/28", + }, + }, + }), + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + expectedAllocatedCIDRFirstRound: map[int]string{ + 0: "127.123.234.0/30", + }, + cidrsToRelease: [][]string{ + {"127.123.234.0/30"}, + }, + expectedAllocatedCIDRSecondRound: map[int]string{ + 0: "127.123.234.0/30", + }, + }, + } + + testFunc := func(tc releasetestCaseMultiCIDR) { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() + // Initialize the range allocator. + allocator, _ := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap) + rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = test.AlwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + + // this is a bit of white box testing + for _, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := utilnet.ParseCIDRSloppy(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + + clusterCIDRList, err := getClusterCIDRList("node0", rangeAllocator.cidrMap) + if err != nil { + t.Fatalf("%v: unexpected error when getting associated clusterCIDR for node %v %v", tc.description, "node0", err) + } + + occupied := false + for _, clusterCIDR := range clusterCIDRList { + if err := rangeAllocator.Occupy(clusterCIDR, cidr); err == nil { + occupied = true + clusterCIDR.AssociatedNodes["fakeNode"] = true + break + } + } + if !occupied { + t.Fatalf("%v: unable to occupy CIDR %v", tc.description, allocated) + } + } + } + + err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]) + if len(tc.expectedAllocatedCIDRFirstRound) != 0 { + if err != nil { + t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + } else { + if err == nil { + t.Fatalf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) + } + // We don't expect any updates here + time.Sleep(time.Second) + if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 { + t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy()) + } + } + + for _, cidrToRelease := range tc.cidrsToRelease { + + nodeToRelease := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fakeNode", + Labels: map[string]string{ + "testLabel-0": "node0", + }, + }, + } + nodeToRelease.Spec.PodCIDRs = cidrToRelease + err = allocator.ReleaseCIDR(&nodeToRelease) + if err != nil { + t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err) + } + } + if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { + t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + + if len(tc.expectedAllocatedCIDRSecondRound) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDRSecondRound { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] != expectedPodCIDR { + t.Errorf("%v: found cidr %v that should not be allocated on node with CIDRs:%v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + + for _, tc := range testCaseMultiCIDRs { + testFunc(tc) + } +} + +// ClusterCIDR tests. + +var alwaysReady = func() bool { return true } + +type clusterCIDRController struct { + *multiCIDRRangeAllocator + clusterCIDRStore cache.Store +} + +func newController() (*fake.Clientset, *clusterCIDRController) { + client := fake.NewSimpleClientset() + + informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) + cccInformer := informerFactory.Networking().V1alpha1().ClusterCIDRs() + cccIndexer := cccInformer.Informer().GetIndexer() + + nodeInformer := informerFactory.Core().V1().Nodes() + + // These reactors are required to mock functionality that would be covered + // automatically if we weren't using the fake client. + client.PrependReactor("create", "clustercidrs", k8stesting.ReactionFunc(func(action k8stesting.Action) (bool, runtime.Object, error) { + clusterCIDR := action.(k8stesting.CreateAction).GetObject().(*networkingv1alpha1.ClusterCIDR) + + if clusterCIDR.ObjectMeta.GenerateName != "" { + clusterCIDR.ObjectMeta.Name = fmt.Sprintf("%s-%s", clusterCIDR.ObjectMeta.GenerateName, rand.String(8)) + clusterCIDR.ObjectMeta.GenerateName = "" + } + clusterCIDR.Generation = 1 + cccIndexer.Add(clusterCIDR) + + return false, clusterCIDR, nil + })) + client.PrependReactor("update", "clustercidrs", k8stesting.ReactionFunc(func(action k8stesting.Action) (bool, runtime.Object, error) { + clusterCIDR := action.(k8stesting.CreateAction).GetObject().(*networkingv1alpha1.ClusterCIDR) + clusterCIDR.Generation++ + cccIndexer.Update(clusterCIDR) + + return false, clusterCIDR, nil + })) + + _, clusterCIDR, _ := utilnet.ParseCIDRSloppy("192.168.0.0/16") + _, serviceCIDR, _ := utilnet.ParseCIDRSloppy("10.1.0.0/16") + + allocatorParams := CIDRAllocatorParams{ + ClusterCIDRs: []*net.IPNet{clusterCIDR}, + ServiceCIDR: serviceCIDR, + SecondaryServiceCIDR: nil, + NodeCIDRMaskSizes: []int{24}, + } + testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0) + + // Initialize the range allocator. + ra, _ := NewMultiCIDRRangeAllocator(client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap) + cccController := ra.(*multiCIDRRangeAllocator) + + cccController.clusterCIDRSynced = alwaysReady + + return client, &clusterCIDRController{ + cccController, + informerFactory.Networking().V1alpha1().ClusterCIDRs().Informer().GetStore(), + } +} + +// Ensure default ClusterCIDR is created during bootstrap. +func TestClusterCIDRDefault(t *testing.T) { + defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil) + + client, _ := newController() + createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{}) + assert.Nil(t, err, "Expected no error getting clustercidr objects") + assert.Equal(t, defaultCCC.Spec, createdCCC.Spec) +} + +// Ensure SyncClusterCIDR creates a new valid ClusterCIDR. +func TestSyncClusterCIDRCreate(t *testing.T) { + tests := []struct { + name string + ccc *networkingv1alpha1.ClusterCIDR + wantErr bool + }{ + { + name: "valid IPv4 ClusterCIDR with no NodeSelector", + ccc: makeClusterCIDR("ipv4-ccc", "10.2.0.0/16", "", 8, nil), + wantErr: false, + }, + { + name: "valid IPv4 ClusterCIDR with NodeSelector", + ccc: makeClusterCIDR("ipv4-ccc-label", "10.3.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid IPv4 ClusterCIDR with overlapping CIDRs", + ccc: makeClusterCIDR("ipv4-ccc-overlap", "10.2.0.0/24", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid IPv6 ClusterCIDR with no NodeSelector", + ccc: makeClusterCIDR("ipv6-ccc", "", "fd00:1::/112", 8, nil), + wantErr: false, + }, + { + name: "valid IPv6 ClusterCIDR with NodeSelector", + ccc: makeClusterCIDR("ipv6-ccc-label", "", "fd00:2::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid IPv6 ClusterCIDR with overlapping CIDRs", + ccc: makeClusterCIDR("ipv6-ccc-overlap", "", "fd00:1:1::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid Dualstack ClusterCIDR with no NodeSelector", + ccc: makeClusterCIDR("dual-ccc", "10.2.0.0/16", "fd00:1::/112", 8, nil), + wantErr: false, + }, + { + name: "valid DualStack ClusterCIDR with NodeSelector", + ccc: makeClusterCIDR("dual-ccc-label", "10.3.0.0/16", "fd00:2::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + { + name: "valid Dualstack ClusterCIDR with overlapping CIDRs", + ccc: makeClusterCIDR("dual-ccc-overlap", "10.2.0.0/16", "fd00:1:1::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: false, + }, + // invalid ClusterCIDRs. + { + name: "invalid ClusterCIDR with both IPv4 and IPv6 CIDRs nil", + ccc: makeClusterCIDR("invalid-ccc", "", "", 0, nil), + wantErr: true, + }, + { + name: "invalid IPv4 ClusterCIDR", + ccc: makeClusterCIDR("invalid-ipv4-ccc", "1000.2.0.0/16", "", 8, nil), + wantErr: true, + }, + { + name: "invalid IPv6 ClusterCIDR", + ccc: makeClusterCIDR("invalid-ipv6-ccc", "", "aaaaa:1:1::/112", 8, nil), + wantErr: true, + }, + { + name: "invalid dualstack ClusterCIDR", + ccc: makeClusterCIDR("invalid-dual-ccc", "10.2.0.0/16", "aaaaa:1:1::/112", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})), + wantErr: true, + }, + } + + client, cccController := newController() + for _, tc := range tests { + cccController.clusterCIDRStore.Add(tc.ccc) + err := cccController.syncClusterCIDR(tc.ccc.Name) + if tc.wantErr { + assert.Error(t, err) + continue + } + assert.NoError(t, err) + expectActions(t, client.Actions(), 1, "create", "clustercidrs") + + createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), tc.ccc.Name, metav1.GetOptions{}) + assert.Nil(t, err, "Expected no error getting clustercidr object") + assert.Equal(t, tc.ccc.Spec, createdCCC.Spec) + assert.Equal(t, []string{clusterCIDRFinalizer}, createdCCC.Finalizers) + } +} + +// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR. +func TestSyncClusterCIDRDelete(t *testing.T) { + _, cccController := newController() + + testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})) + + cccController.clusterCIDRStore.Add(testCCC) + err := cccController.syncClusterCIDR(testCCC.Name) + assert.NoError(t, err) + + deletionTimestamp := metav1.Now() + testCCC.DeletionTimestamp = &deletionTimestamp + cccController.clusterCIDRStore.Update(testCCC) + err = cccController.syncClusterCIDR(testCCC.Name) + assert.NoError(t, err) +} + +// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR +// if a node is associated with the ClusterCIDR. +func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) { + client, cccController := newController() + + testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})) + + cccController.clusterCIDRStore.Add(testCCC) + err := cccController.syncClusterCIDR(testCCC.Name) + assert.NoError(t, err) + + // Mock the IPAM controller behavior associating node with ClusterCIDR. + nodeSelectorKey, _ := cccController.nodeSelectorKey(testCCC) + clusterCIDRs, _ := cccController.cidrMap[nodeSelectorKey] + clusterCIDRs[0].AssociatedNodes["test-node"] = true + + createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), testCCC.Name, metav1.GetOptions{}) + assert.Nil(t, err, "Expected no error getting clustercidr object") + + deletionTimestamp := metav1.Now() + createdCCC.DeletionTimestamp = &deletionTimestamp + cccController.clusterCIDRStore.Update(createdCCC) + err = cccController.syncClusterCIDR(createdCCC.Name) + assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name)) +} + +func expectActions(t *testing.T, actions []k8stesting.Action, num int, verb, resource string) { + t.Helper() + // if actions are less, the below logic will panic. + if num > len(actions) { + t.Fatalf("len of actions %v is unexpected. Expected to be at least %v", len(actions), num+1) + } + + for i := 0; i < num; i++ { + relativePos := len(actions) - i - 1 + assert.Equal(t, verb, actions[relativePos].GetVerb(), "Expected action -%d verb to be %s", i, verb) + assert.Equal(t, resource, actions[relativePos].GetResource().Resource, "Expected action -%d resource to be %s", i, resource) + } +} + +func makeNodeSelector(key string, op v1.NodeSelectorOperator, values []string) *v1.NodeSelector { + return &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: key, + Operator: op, + Values: values, + }, + }, + }, + }, + } +} + +// makeClusterCIDR returns a mock ClusterCIDR object. +func makeClusterCIDR(cccName, ipv4CIDR, ipv6CIDR string, perNodeHostBits int32, nodeSelector *v1.NodeSelector) *networkingv1alpha1.ClusterCIDR { + testCCC := &networkingv1alpha1.ClusterCIDR{ + ObjectMeta: metav1.ObjectMeta{Name: cccName}, + Spec: networkingv1alpha1.ClusterCIDRSpec{}, + } + + testCCC.Spec.PerNodeHostBits = perNodeHostBits + + if ipv4CIDR != "" { + testCCC.Spec.IPv4 = ipv4CIDR + } + + if ipv6CIDR != "" { + testCCC.Spec.IPv6 = ipv6CIDR + } + + if nodeSelector != nil { + testCCC.Spec.NodeSelector = nodeSelector + } + + return testCCC +} diff --git a/pkg/controller/nodeipam/ipam/range_allocator.go b/pkg/controller/nodeipam/ipam/range_allocator.go index 3cf6794b228..54a0db9f51f 100644 --- a/pkg/controller/nodeipam/ipam/range_allocator.go +++ b/pkg/controller/nodeipam/ipam/range_allocator.go @@ -41,13 +41,6 @@ import ( controllerutil "k8s.io/kubernetes/pkg/controller/util/node" ) -// cidrs are reserved, then node resource is patched with them -// this type holds the reservation info for a node -type nodeReservedCIDRs struct { - allocatedCIDRs []*net.IPNet - nodeName string -} - type rangeAllocator struct { client clientset.Interface // cluster cidrs as passed in during controller creation @@ -333,7 +326,7 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error { var err error var node *v1.Node defer r.removeNodeFromProcessing(data.nodeName) - cidrsString := cidrsAsString(data.allocatedCIDRs) + cidrsString := ipnetToStringList(data.allocatedCIDRs) node, err = r.nodeLister.Get(data.nodeName) if err != nil { klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) @@ -391,12 +384,3 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error { } return err } - -// converts a slice of cidrs into ,, -func cidrsAsString(inCIDRs []*net.IPNet) []string { - outCIDRs := make([]string, len(inCIDRs)) - for idx, inCIDR := range inCIDRs { - outCIDRs[idx] = inCIDR.String() - } - return outCIDRs -} diff --git a/pkg/controller/nodeipam/ipam/range_allocator_test.go b/pkg/controller/nodeipam/ipam/range_allocator_test.go index 0e7c452e01f..b0dd1c32f47 100644 --- a/pkg/controller/nodeipam/ipam/range_allocator_test.go +++ b/pkg/controller/nodeipam/ipam/range_allocator_test.go @@ -25,40 +25,12 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/informers" - coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes/fake" - "k8s.io/kubernetes/pkg/controller" + "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test" "k8s.io/kubernetes/pkg/controller/testutil" netutils "k8s.io/utils/net" ) -const testNodePollInterval = 10 * time.Millisecond - -var alwaysReady = func() bool { return true } - -func waitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error { - return wait.Poll(nodePollInterval, timeout, func() (bool, error) { - if len(nodeHandler.GetUpdatedNodesCopy()) >= number { - return true, nil - } - return false, nil - }) -} - -// Creates a fakeNodeInformer using the provided fakeNodeHandler. -func getFakeNodeInformer(fakeNodeHandler *testutil.FakeNodeHandler) coreinformers.NodeInformer { - fakeClient := &fake.Clientset{} - fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) - fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() - - for _, node := range fakeNodeHandler.Existing { - fakeNodeInformer.Informer().GetStore().Add(node) - } - - return fakeNodeInformer -} - type testCase struct { description string fakeNodeHandler *testutil.FakeNodeHandler @@ -305,7 +277,7 @@ func TestOccupyPreExistingCIDR(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { // Initialize the range allocator. - fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) _, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) if err == nil && tc.ctrlCreateFail { @@ -321,7 +293,7 @@ func TestOccupyPreExistingCIDR(t *testing.T) { func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { // Non-parallel test (overrides global var) oldNodePollInterval := nodePollInterval - nodePollInterval = testNodePollInterval + nodePollInterval = test.NodePollInterval defer func() { nodePollInterval = oldNodePollInterval }() @@ -537,7 +509,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { // test function testFunc := func(tc testCase) { - fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) // Initialize the range allocator. allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) @@ -550,7 +522,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) return } - rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.recorder = testutil.NewFakeRecorder() go allocator.Run(wait.NeverStop) @@ -580,7 +552,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { if updateCount != 1 { t.Fatalf("test error: all tests must update exactly one node") } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) } @@ -639,7 +611,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) { testFunc := func(tc testCase) { // Initialize the range allocator. - allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) + allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) if err != nil { t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) } @@ -648,7 +620,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) { t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) return } - rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.recorder = testutil.NewFakeRecorder() go allocator.Run(wait.NeverStop) @@ -708,7 +680,7 @@ type releaseTestCase struct { func TestReleaseCIDRSuccess(t *testing.T) { // Non-parallel test (overrides global var) oldNodePollInterval := nodePollInterval - nodePollInterval = testNodePollInterval + nodePollInterval = test.NodePollInterval defer func() { nodePollInterval = oldNodePollInterval }() @@ -784,13 +756,13 @@ func TestReleaseCIDRSuccess(t *testing.T) { testFunc := func(tc releaseTestCase) { // Initialize the range allocator. - allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) + allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) rangeAllocator, ok := allocator.(*rangeAllocator) if !ok { t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) return } - rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.recorder = testutil.NewFakeRecorder() go allocator.Run(wait.NeverStop) @@ -813,7 +785,7 @@ func TestReleaseCIDRSuccess(t *testing.T) { if err != nil { t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) } } else { @@ -841,7 +813,7 @@ func TestReleaseCIDRSuccess(t *testing.T) { if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) } - if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) } diff --git a/pkg/controller/nodeipam/ipam/test/utils.go b/pkg/controller/nodeipam/ipam/test/utils.go index 42242e1899b..2586484e975 100644 --- a/pkg/controller/nodeipam/ipam/test/utils.go +++ b/pkg/controller/nodeipam/ipam/test/utils.go @@ -18,10 +18,21 @@ package test import ( "net" + "time" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/kubernetes/pkg/controller" + "k8s.io/kubernetes/pkg/controller/testutil" netutils "k8s.io/utils/net" ) +const NodePollInterval = 10 * time.Millisecond + +var AlwaysReady = func() bool { return true } + // MustParseCIDR returns the CIDR range parsed from s or panics if the string // cannot be parsed. func MustParseCIDR(s string) *net.IPNet { @@ -31,3 +42,25 @@ func MustParseCIDR(s string) *net.IPNet { } return ret } + +// FakeNodeInformer creates a fakeNodeInformer using the provided fakeNodeHandler. +func FakeNodeInformer(fakeNodeHandler *testutil.FakeNodeHandler) coreinformers.NodeInformer { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) + fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() + + for _, node := range fakeNodeHandler.Existing { + fakeNodeInformer.Informer().GetStore().Add(node) + } + + return fakeNodeInformer +} + +func WaitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error { + return wait.Poll(NodePollInterval, timeout, func() (bool, error) { + if len(nodeHandler.GetUpdatedNodesCopy()) >= number { + return true, nil + } + return false, nil + }) +} diff --git a/pkg/controller/nodeipam/node_ipam_controller.go b/pkg/controller/nodeipam/node_ipam_controller.go index 3a3c8ce1919..2cae14bf532 100644 --- a/pkg/controller/nodeipam/node_ipam_controller.go +++ b/pkg/controller/nodeipam/node_ipam_controller.go @@ -20,20 +20,18 @@ import ( "net" "time" - "k8s.io/klog/v2" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - + coreinformers "k8s.io/client-go/informers/core/v1" + networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" + clientset "k8s.io/client-go/kubernetes" v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" - - coreinformers "k8s.io/client-go/informers/core/v1" - clientset "k8s.io/client-go/kubernetes" - corelisters "k8s.io/client-go/listers/core/v1" cloudprovider "k8s.io/cloud-provider" controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers" "k8s.io/component-base/metrics/prometheus/ratelimiter" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam" ) @@ -74,6 +72,7 @@ type Controller struct { // currently, this should be handled as a fatal error. func NewNodeIpamController( nodeInformer coreinformers.NodeInformer, + clusterCIDRInformer networkinginformers.ClusterCIDRInformer, cloud cloudprovider.Interface, kubeClient clientset.Interface, clusterCIDRs []*net.IPNet, @@ -136,7 +135,7 @@ func NewNodeIpamController( NodeCIDRMaskSizes: nodeCIDRMaskSizes, } - ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, ic.allocatorType, allocatorParams) + ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams) if err != nil { return nil, err } diff --git a/pkg/controller/nodeipam/node_ipam_controller_test.go b/pkg/controller/nodeipam/node_ipam_controller_test.go index 48e850b9e78..ad4b433789f 100644 --- a/pkg/controller/nodeipam/node_ipam_controller_test.go +++ b/pkg/controller/nodeipam/node_ipam_controller_test.go @@ -48,6 +48,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, fakeClient := &fake.Clientset{} fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() + fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() for _, node := range fakeNodeHandler.Existing { fakeNodeInformer.Informer().GetStore().Add(node) @@ -55,7 +56,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues()) return NewNodeIpamController( - fakeNodeInformer, fakeGCE, clientSet, + fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet, clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType, ) } @@ -78,6 +79,9 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) { {"valid_range_allocator_dualstack", "10.0.0.0/21,2000::/10", "10.1.0.0/21", emptyServiceCIDR, []int{24, 98}, ipam.RangeAllocatorType, false}, {"valid_range_allocator_dualstack_dualstackservice", "10.0.0.0/21,2000::/10", "10.1.0.0/21", "3000::/10", []int{24, 98}, ipam.RangeAllocatorType, false}, + {"valid_multi_cidr_range_allocator", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.MultiCIDRRangeAllocatorType, false}, + {"valid_multi_cidr_range_allocator_dualstack", "10.0.0.0/21,2000::/10", "10.1.0.0/21", emptyServiceCIDR, []int{24, 98}, ipam.MultiCIDRRangeAllocatorType, false}, + {"valid_cloud_allocator", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.CloudAllocatorType, false}, {"valid_ipam_from_cluster", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, false}, {"valid_ipam_from_cloud", "10.0.0.0/21", "10.1.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromCloudAllocatorType, false}, diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 5680afd02f8..24bdc1c1f6c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -569,6 +569,13 @@ const ( // Enables the usage of different protocols in the same Service with type=LoadBalancer MixedProtocolLBService featuregate.Feature = "MixedProtocolLBService" + // owner: @sarveshr7 + // kep: http://kep.k8s.io/2593 + // alpha: v1.25 + // + // Enables the MultiCIDR Range allocator. + MultiCIDRRangeAllocator featuregate.Feature = "MultiCIDRRangeAllocator" + // owner: @rikatz // kep: http://kep.k8s.io/2079 // alpha: v1.21 @@ -997,6 +1004,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS MixedProtocolLBService: {Default: true, PreRelease: featuregate.Beta}, + MultiCIDRRangeAllocator: {Default: false, PreRelease: featuregate.Alpha}, + NetworkPolicyEndPort: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27 NetworkPolicyStatus: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/test/integration/ipamperf/ipam_test.go b/test/integration/ipamperf/ipam_test.go index 90e931358b5..a0f35627978 100644 --- a/test/integration/ipamperf/ipam_test.go +++ b/test/integration/ipamperf/ipam_test.go @@ -50,8 +50,10 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR, sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour) ipamController, err := nodeipam.NewNodeIpamController( - sharedInformer.Core().V1().Nodes(), config.Cloud, clientSet, - []*net.IPNet{clusterCIDR}, serviceCIDR, nil, []int{subnetMaskSize}, config.AllocatorType, + sharedInformer.Core().V1().Nodes(), + sharedInformer.Networking().V1alpha1().ClusterCIDRs(), + config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil, + []int{subnetMaskSize}, config.AllocatorType, ) if err != nil { return nil, shutdownFunc, err