From 02d944d046fd53a702f51aeb0e86ce56c456beee Mon Sep 17 00:00:00 2001 From: Sarvesh Rangnekar Date: Sun, 31 Jul 2022 03:52:28 +0000 Subject: [PATCH] Add a priority queue to implement MultiCIDR tie-breaks The Priority is determined as follows: P0: ClusterCIDR with higher number of matching labels has highest priority. P1: ClusterCIDR having cidrSet with fewer allocatable Pod CIDRs has higher priority. P2: ClusterCIDR with a PerNodeMaskSize having fewer IPs has higher priority. P3: ClusterCIDR having label with lower alphanumeric value has higher priority. P4: ClusterCIDR with a cidrSet having a smaller IP address value has higher priority. --- .../ipam/multi_cidr_priority_queue.go | 140 +++++++++++++++ .../ipam/multi_cidr_priority_queue_test.go | 170 ++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 pkg/controller/nodeipam/ipam/multi_cidr_priority_queue.go create mode 100644 pkg/controller/nodeipam/ipam/multi_cidr_priority_queue_test.go diff --git a/pkg/controller/nodeipam/ipam/multi_cidr_priority_queue.go b/pkg/controller/nodeipam/ipam/multi_cidr_priority_queue.go new file mode 100644 index 00000000000..1c3eedc7d17 --- /dev/null +++ b/pkg/controller/nodeipam/ipam/multi_cidr_priority_queue.go @@ -0,0 +1,140 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "math" + + cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" +) + +// A PriorityQueue implementation based on https://pkg.go.dev/container/heap#example-package-PriorityQueue + +// An PriorityQueueItem is something we manage in a priority queue. +type PriorityQueueItem struct { + clusterCIDR *cidrset.ClusterCIDR + // labelMatchCount is the first determinant of priority. + labelMatchCount int + // selectorString is a string representation of the labelSelector associated with the cidrSet. + selectorString string + // index is needed by update and is maintained by the heap.Interface methods. + index int // The index of the item in the heap. +} + +// A PriorityQueue implements heap.Interface and holds PriorityQueueItems. +type PriorityQueue []*PriorityQueueItem + +func (pq PriorityQueue) Len() int { return len(pq) } + +// Less compares the priority queue items, to store in a min heap. +// Less(i,j) == true denotes i has higher priority than j. +func (pq PriorityQueue) Less(i, j int) bool { + if pq[i].labelMatchCount != pq[j].labelMatchCount { + // P0: CidrSet with higher number of matching labels has the highest priority. + return pq[i].labelMatchCount > pq[j].labelMatchCount + } + + // If the count of matching labels is equal, compare the max allocatable pod CIDRs. + if pq[i].maxAllocatable() != pq[j].maxAllocatable() { + // P1: CidrSet with fewer allocatable pod CIDRs has higher priority. + return pq[i].maxAllocatable() < pq[j].maxAllocatable() + } + + // If the value of allocatable pod CIDRs is equal, compare the node mask size. + if pq[i].nodeMaskSize() != pq[j].nodeMaskSize() { + // P2: CidrSet with a PerNodeMaskSize having fewer IPs has higher priority. + // For example, `27` (32 IPs) picked before `25` (128 IPs). + return pq[i].nodeMaskSize() > pq[j].nodeMaskSize() + } + + // If the per node mask size are equal compare the CIDR labels. + if pq[i].selectorString != pq[j].selectorString { + // P3: CidrSet having label with lower alphanumeric value has higher priority. + return pq[i].selectorString < pq[j].selectorString + } + + // P4: CidrSet having an alpha-numerically smaller IP address value has a higher priority. + return pq[i].cidrLabel() < pq[j].cidrLabel() +} + +func (pq PriorityQueue) Swap(i, j int) { + pq[i], pq[j] = pq[j], pq[i] + pq[i].index = i + pq[j].index = j +} + +func (pq *PriorityQueue) Push(x interface{}) { + n := len(*pq) + if item, ok := x.(*PriorityQueueItem); ok { + item.index = n + *pq = append(*pq, item) + } +} + +func (pq *PriorityQueue) Pop() interface{} { + old := *pq + n := len(old) + item := old[n-1] + old[n-1] = nil // avoid memory leak. + item.index = -1 // for safety. + *pq = old[0 : n-1] + return item +} + +// maxAllocatable computes the minimum value of the MaxCIDRs for a ClusterCIDR. +// It compares the MaxCIDRs for each CIDR family and returns the minimum. +// e.g. IPv4 - 10.0.0.0/16 PerNodeMaskSize: 24 MaxCIDRs = 256 +// IPv6 - ff:ff::/120 PerNodeMaskSize: 120 MaxCIDRs = 1 +// MaxAllocatable for this ClusterCIDR = 1 +func (pqi *PriorityQueueItem) maxAllocatable() int { + ipv4Allocatable := math.MaxInt + ipv6Allocatable := math.MaxInt + + if pqi.clusterCIDR.IPv4CIDRSet != nil { + ipv4Allocatable = pqi.clusterCIDR.IPv4CIDRSet.MaxCIDRs + } + + if pqi.clusterCIDR.IPv6CIDRSet != nil { + ipv6Allocatable = pqi.clusterCIDR.IPv6CIDRSet.MaxCIDRs + } + + if ipv4Allocatable < ipv6Allocatable { + return ipv4Allocatable + } + + return ipv6Allocatable +} + +// nodeMaskSize returns IPv4 NodeMaskSize if present, else returns IPv6 NodeMaskSize. +// Note the requirement: 32 - IPv4 NodeMaskSize == 128 - IPv6 NodeMaskSize +// Due to the above requirement it does not matter which NodeMaskSize we compare. +func (pqi *PriorityQueueItem) nodeMaskSize() int { + if pqi.clusterCIDR.IPv4CIDRSet != nil { + return pqi.clusterCIDR.IPv4CIDRSet.NodeMaskSize + } + + return pqi.clusterCIDR.IPv6CIDRSet.NodeMaskSize +} + +// cidrLabel returns IPv4 CIDR if present, else returns IPv6 CIDR. +func (pqi *PriorityQueueItem) cidrLabel() string { + if pqi.clusterCIDR.IPv4CIDRSet != nil { + return pqi.clusterCIDR.IPv4CIDRSet.Label + } + + return pqi.clusterCIDR.IPv6CIDRSet.Label +} diff --git a/pkg/controller/nodeipam/ipam/multi_cidr_priority_queue_test.go b/pkg/controller/nodeipam/ipam/multi_cidr_priority_queue_test.go new file mode 100644 index 00000000000..357592f6ba1 --- /dev/null +++ b/pkg/controller/nodeipam/ipam/multi_cidr_priority_queue_test.go @@ -0,0 +1,170 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "container/heap" + "testing" + + "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" + utilnet "k8s.io/utils/net" +) + +func createTestPriorityQueueItem(name, cidr, selectorString string, labelMatchCount, perNodeHostBits int) *PriorityQueueItem { + _, clusterCIDR, _ := utilnet.ParseCIDRSloppy(cidr) + cidrSet, _ := multicidrset.NewMultiCIDRSet(clusterCIDR, perNodeHostBits) + + return &PriorityQueueItem{ + clusterCIDR: &multicidrset.ClusterCIDR{ + Name: name, + IPv4CIDRSet: cidrSet, + }, + labelMatchCount: labelMatchCount, + selectorString: selectorString, + } +} + +func TestPriorityQueue(t *testing.T) { + + pqi1 := createTestPriorityQueueItem("cidr1", "192.168.0.0/16", "foo=bar,name=test1", 1, 8) + pqi2 := createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8) + pqi3 := createTestPriorityQueueItem("cidr3", "172.16.0.0/16", "foo=bar,name=test3", 2, 8) + pqi4 := createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6) + pqi5 := createTestPriorityQueueItem("cidr5", "10.1.2.0/26", "foo=bar,name=test5", 2, 6) + pqi6 := createTestPriorityQueueItem("cidr6", "10.1.3.0/26", "abc=bar,name=test4", 2, 6) + + for _, testQueue := range []struct { + name string + items []*PriorityQueueItem + want *PriorityQueueItem + }{ + {"Test queue with single item", []*PriorityQueueItem{pqi1}, pqi1}, + {"Test queue with items having different labelMatchCount", []*PriorityQueueItem{pqi1, pqi2}, pqi2}, + {"Test queue with items having same labelMatchCount, different max Allocatable Pod CIDRs", []*PriorityQueueItem{pqi1, pqi2, pqi3}, pqi2}, + {"Test queue with items having same labelMatchCount, max Allocatable Pod CIDRs, different PerNodeMaskSize", []*PriorityQueueItem{pqi1, pqi2, pqi4}, pqi4}, + {"Test queue with items having same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, different labels", []*PriorityQueueItem{pqi1, pqi2, pqi4, pqi5}, pqi4}, + {"Test queue with items having same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, labels, different IP addresses", []*PriorityQueueItem{pqi1, pqi2, pqi4, pqi5, pqi6}, pqi4}, + } { + pq := make(PriorityQueue, 0) + for _, pqi := range testQueue.items { + heap.Push(&pq, pqi) + } + + got := heap.Pop(&pq) + + if got != testQueue.want { + t.Errorf("Error, wanted: %+v, got: %+v", testQueue.want, got) + } + } +} + +func TestLess(t *testing.T) { + + for _, testQueue := range []struct { + name string + items []*PriorityQueueItem + want bool + }{ + { + name: "different labelMatchCount, i higher priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr1", "192.168.0.0/16", "foo=bar,name=test1", 2, 8), + createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 1, 8), + }, + want: true, + }, + { + name: "different labelMatchCount, i lower priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr1", "192.168.0.0/16", "foo=bar,name=test1", 1, 8), + createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8), + }, + want: false, + }, + { + name: "same labelMatchCount, different max allocatable cidrs, i higher priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8), + createTestPriorityQueueItem("cidr3", "172.16.0.0/16", "foo=bar,name=test3", 2, 8), + }, + want: true, + }, + { + name: "same labelMatchCount, different max allocatable cidrs, i lower priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr2", "10.1.0.0/16", "foo=bar,name=test2", 2, 8), + createTestPriorityQueueItem("cidr3", "172.16.0.0/24", "foo=bar,name=test3", 2, 8), + }, + want: false, + }, + { + name: "same labelMatchCount, max allocatable cidrs, different PerNodeMaskSize i higher priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr2", "10.1.0.0/26", "foo=bar,name=test2", 2, 6), + createTestPriorityQueueItem("cidr4", "10.1.1.0/24", "abc=bar,name=test4", 2, 8), + }, + want: true, + }, + { + name: "same labelMatchCount, max allocatable cidrs, different PerNodeMaskSize i lower priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8), + createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6), + }, + want: false, + }, + { + name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, different labels i higher priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6), + createTestPriorityQueueItem("cidr5", "10.1.2.0/26", "foo=bar,name=test5", 2, 6), + }, + want: true, + }, + { + name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, different labels i lower priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "xyz=bar,name=test4", 2, 6), + createTestPriorityQueueItem("cidr5", "10.1.2.0/26", "foo=bar,name=test5", 2, 6), + }, + want: false, + }, + { + name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, labels, different IP addresses i higher priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6), + createTestPriorityQueueItem("cidr6", "10.1.3.0/26", "abc=bar,name=test4", 2, 6), + }, + want: true, + }, + { + name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, labels, different IP addresses i lower priority than j", + items: []*PriorityQueueItem{ + createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "xyz=bar,name=test4", 2, 6), + createTestPriorityQueueItem("cidr6", "10.0.3.0/26", "abc=bar,name=test4", 2, 6), + }, + want: false, + }, + } { + var pq PriorityQueue + pq = testQueue.items + got := pq.Less(0, 1) + if got != testQueue.want { + t.Errorf("Error, wanted: %v, got: %v\nTest %q \npq[0]: %+v \npq[1]: %+v ", testQueue.want, got, testQueue.name, pq[0], pq[1]) + } + } +}