diff --git a/pkg/scheduler/algorithm/predicates/metadata.go b/pkg/scheduler/algorithm/predicates/metadata.go index 095d80f29d5..48fa06bb090 100644 --- a/pkg/scheduler/algorithm/predicates/metadata.go +++ b/pkg/scheduler/algorithm/predicates/metadata.go @@ -34,6 +34,9 @@ import ( schedutil "k8s.io/kubernetes/pkg/scheduler/util" ) +// MaxInt32 is the maximum value of int32 +const MaxInt32 = int32(^uint32(0) >> 1) + // PredicateMetadata interface represents anything that can access a predicate metadata. type PredicateMetadata interface { ShallowCopy() PredicateMetadata @@ -66,6 +69,21 @@ type topologyPairsMaps struct { podToTopologyPairs map[string]topologyPairSet } +// topologyPairsPodSpreadMap combines []int32 and topologyPairsMaps to represent +// (1) how existing pods match incoming pod on its spread constraints +// (2) minimum match number of each hard spread constraint +type topologyPairsPodSpreadMap struct { + minMatches []int32 + *topologyPairsMaps +} + +func newTopologyPairsPodSpreadMap() *topologyPairsPodSpreadMap { + return &topologyPairsPodSpreadMap{ + // minMatches will be initilized with proper size later + topologyPairsMaps: newTopologyPairsMaps(), + } +} + // NOTE: When new fields are added/removed or logic is changed, please make sure that // RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes. type predicateMetadata struct { @@ -91,6 +109,9 @@ type predicateMetadata struct { // which should be accounted only by the extenders. This set is synthesized // from scheduler extender configuration and does not change per pod. ignoredExtendedResources sets.String + // Similar like map for pod (anti-)affinity, but impose additional min matches info + // to describe mininum match number on each topology spread constraint + topologyPairsPodSpreadMap *topologyPairsPodSpreadMap } // Ensure that predicateMetadata implements algorithm.PredicateMetadata. @@ -137,17 +158,24 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf if pod == nil { return nil } + // existingPodSpreadConstraintsMap represents how existing pods matches "pod" + // on its spread constraints + existingPodSpreadConstraintsMap, err := getTPMapMatchingSpreadConstraints(pod, nodeNameToInfoMap) + if err != nil { + klog.Errorf("Error calculating spreadConstraintsMap: %v", err) + return nil + } // existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, nodeNameToInfoMap) if err != nil { - klog.Errorf("[predicate meta data generation] error finding pods whose affinity terms are matched: %v", err) + klog.Errorf("Error calculating existingPodAntiAffinityMap: %v", err) return nil } // incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity // incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, nodeNameToInfoMap) if err != nil { - klog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err) + klog.Errorf("Error calculating incomingPod(Anti)AffinityMap: %v", err) return nil } predicateMetadata := &predicateMetadata{ @@ -158,6 +186,7 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf topologyPairsPotentialAffinityPods: incomingPodAffinityMap, topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap, topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap, + topologyPairsPodSpreadMap: existingPodSpreadConstraintsMap, } for predicateName, precomputeFunc := range predicateMetadataProducers { klog.V(10).Infof("Precompute: %v", predicateName) @@ -166,46 +195,224 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf return predicateMetadata } +func getTPMapMatchingSpreadConstraints(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (*topologyPairsPodSpreadMap, error) { + // we have feature gating in APIserver to strip the spec + // so don't need to re-check feature gate, just check length of constraints + constraints := getHardTopologySpreadConstraints(pod) + if len(constraints) == 0 { + return nil, nil + } + + allNodeNames := make([]string, 0, len(nodeInfoMap)) + for name := range nodeInfoMap { + allNodeNames = append(allNodeNames, name) + } + + var lock sync.Mutex + var firstError error + + topologyPairsPodSpreadMap := newTopologyPairsPodSpreadMap() + + appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) { + lock.Lock() + topologyPairsPodSpreadMap.appendMaps(toAppend) + lock.Unlock() + } + catchError := func(err error) { + lock.Lock() + if firstError == nil { + firstError = err + } + lock.Unlock() + } + + ctx, cancel := context.WithCancel(context.Background()) + + processNode := func(i int) { + nodeInfo := nodeInfoMap[allNodeNames[i]] + node := nodeInfo.Node() + if node == nil { + catchError(fmt.Errorf("node %q not found", allNodeNames[i])) + cancel() + return + } + // Be design if NodeAffinity or NodeSelector is defined, spreading is + // applied to nodes that pass those filters. + if !podMatchesNodeSelectorAndAffinityTerms(pod, node) { + return + } + // ensure current node's labels contains all topologyKeys in 'constraints' + for _, constraint := range constraints { + if _, ok := node.Labels[constraint.TopologyKey]; !ok { + return + } + } + + nodeTopologyMaps := newTopologyPairsMaps() + // nodeInfo.Pods() can be empty; or all pods don't fit + for _, existingPod := range nodeInfo.Pods() { + ok, err := podMatchesAllSpreadConstraints(existingPod, pod.Namespace, constraints) + if err != nil { + catchError(err) + cancel() + return + } + if ok { + for _, constraint := range constraints { + // constraint.TopologyKey is already guaranteed to be present + pair := topologyPair{key: constraint.TopologyKey, value: node.Labels[constraint.TopologyKey]} + nodeTopologyMaps.addTopologyPair(pair, existingPod) + } + } + } + // If needed, append topology pair without entry of pods. + // For example, on node-x, there is no pod matching spread constraints + // but node-x should be also considered as a match (with match number 0) + // i.e. : {} + for _, constraint := range constraints { + // constraint.TopologyKey is already guaranteed to be present + pair := topologyPair{ + key: constraint.TopologyKey, + value: node.Labels[constraint.TopologyKey], + } + // addTopologyPairWithoutPods is a non-op if other pods match this pair + nodeTopologyMaps.addTopologyPairWithoutPods(pair) + } + + appendTopologyPairsMaps(nodeTopologyMaps) + } + workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode) + + if firstError != nil { + return nil, firstError + } + + // calculate min match for each topology pair + topologyPairsPodSpreadMap.minMatches = make([]int32, len(constraints)) + tpKeyIdx := make(map[string]int) + for i, constraint := range constraints { + tpKeyIdx[constraint.TopologyKey] = i + topologyPairsPodSpreadMap.minMatches[i] = MaxInt32 + } + for pair, podSet := range topologyPairsPodSpreadMap.topologyPairToPods { + idx := tpKeyIdx[pair.key] + // short circuit if we see 0 as min match of the topologyKey + if topologyPairsPodSpreadMap.minMatches[idx] == 0 { + continue + } + if l := int32(len(podSet)); l < topologyPairsPodSpreadMap.minMatches[idx] { + topologyPairsPodSpreadMap.minMatches[idx] = l + } + } + return topologyPairsPodSpreadMap, nil +} + +func getHardTopologySpreadConstraints(pod *v1.Pod) (constraints []v1.TopologySpreadConstraint) { + if pod != nil { + for _, constraint := range pod.Spec.TopologySpreadConstraints { + if constraint.WhenUnsatisfiable == v1.DoNotSchedule { + constraints = append(constraints, constraint) + } + } + } + return +} + +func podMatchesAllSpreadConstraints(pod *v1.Pod, ns string, constraints []v1.TopologySpreadConstraint) (bool, error) { + if len(constraints) == 0 || pod.Namespace != ns { + return false, nil + } + return podLabelsMatchesSpreadConstraints(pod.Labels, constraints) +} + +// some corner cases: +// 1. podLabels = nil, constraint.LabelSelector = nil => returns false +// 2. podLabels = nil => returns false +// 3. constraint.LabelSelector = nil => returns false +func podLabelsMatchesSpreadConstraints(podLabels map[string]string, constraints []v1.TopologySpreadConstraint) (bool, error) { + if len(constraints) == 0 { + return false, nil + } + for _, constraint := range constraints { + selector, err := metav1.LabelSelectorAsSelector(constraint.LabelSelector) + if err != nil { + return false, err + } + if !selector.Matches(labels.Set(podLabels)) { + return false, nil + } + } + return true, nil +} + // returns a pointer to a new topologyPairsMaps func newTopologyPairsMaps() *topologyPairsMaps { return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet), podToTopologyPairs: make(map[string]topologyPairSet)} } -func (topologyPairsMaps *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) { +func (m *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) { podFullName := schedutil.GetPodFullName(pod) - if topologyPairsMaps.topologyPairToPods[pair] == nil { - topologyPairsMaps.topologyPairToPods[pair] = make(map[*v1.Pod]struct{}) + if m.topologyPairToPods[pair] == nil { + m.topologyPairToPods[pair] = make(map[*v1.Pod]struct{}) } - topologyPairsMaps.topologyPairToPods[pair][pod] = struct{}{} - if topologyPairsMaps.podToTopologyPairs[podFullName] == nil { - topologyPairsMaps.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{}) + m.topologyPairToPods[pair][pod] = struct{}{} + if m.podToTopologyPairs[podFullName] == nil { + m.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{}) } - topologyPairsMaps.podToTopologyPairs[podFullName][pair] = struct{}{} + m.podToTopologyPairs[podFullName][pair] = struct{}{} } -func (topologyPairsMaps *topologyPairsMaps) removePod(deletedPod *v1.Pod) { +// add a topology pair holder if needed +func (m *topologyPairsMaps) addTopologyPairWithoutPods(pair topologyPair) { + if m.topologyPairToPods[pair] == nil { + m.topologyPairToPods[pair] = make(map[*v1.Pod]struct{}) + } +} + +func (m *topologyPairsMaps) removePod(deletedPod *v1.Pod) { deletedPodFullName := schedutil.GetPodFullName(deletedPod) - for pair := range topologyPairsMaps.podToTopologyPairs[deletedPodFullName] { - delete(topologyPairsMaps.topologyPairToPods[pair], deletedPod) - if len(topologyPairsMaps.topologyPairToPods[pair]) == 0 { - delete(topologyPairsMaps.topologyPairToPods, pair) + for pair := range m.podToTopologyPairs[deletedPodFullName] { + delete(m.topologyPairToPods[pair], deletedPod) + if len(m.topologyPairToPods[pair]) == 0 { + delete(m.topologyPairToPods, pair) } } - delete(topologyPairsMaps.podToTopologyPairs, deletedPodFullName) + delete(m.podToTopologyPairs, deletedPodFullName) } -func (topologyPairsMaps *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) { +func (m *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) { if toAppend == nil { return } for pair := range toAppend.topologyPairToPods { - for pod := range toAppend.topologyPairToPods[pair] { - topologyPairsMaps.addTopologyPair(pair, pod) + if podSet := toAppend.topologyPairToPods[pair]; len(podSet) == 0 { + m.addTopologyPairWithoutPods(pair) + } else { + for pod := range podSet { + m.addTopologyPair(pair, pod) + } } } } +func (m *topologyPairsMaps) clone() *topologyPairsMaps { + copy := newTopologyPairsMaps() + copy.appendMaps(m) + return copy +} + +func (podSpreadMap *topologyPairsPodSpreadMap) clone() *topologyPairsPodSpreadMap { + // podSpreadMap could be nil when EvenPodsSpread feature is disabled + if podSpreadMap == nil { + return nil + } + copy := newTopologyPairsPodSpreadMap() + copy.minMatches = append([]int32(nil), podSpreadMap.minMatches...) + copy.topologyPairsMaps.appendMaps(podSpreadMap.topologyPairsMaps) + return copy +} + // RemovePod changes predicateMetadata assuming that the given `deletedPod` is // deleted from the system. func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error { @@ -301,12 +508,10 @@ func (meta *predicateMetadata) ShallowCopy() PredicateMetadata { ignoredExtendedResources: meta.ignoredExtendedResources, } newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...) - newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps() - newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods) - newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps() - newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods) - newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps() - newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap) + newPredMeta.topologyPairsPotentialAffinityPods = meta.topologyPairsPotentialAffinityPods.clone() + newPredMeta.topologyPairsPotentialAntiAffinityPods = meta.topologyPairsPotentialAntiAffinityPods.clone() + newPredMeta.topologyPairsAntiAffinityPodsMap = meta.topologyPairsAntiAffinityPodsMap.clone() + newPredMeta.topologyPairsPodSpreadMap = meta.topologyPairsPodSpreadMap.clone() newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil), meta.serviceAffinityMatchingPodServices...) newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil), diff --git a/pkg/scheduler/algorithm/predicates/metadata_test.go b/pkg/scheduler/algorithm/predicates/metadata_test.go index 3ab656dc30a..c29f8b10670 100644 --- a/pkg/scheduler/algorithm/predicates/metadata_test.go +++ b/pkg/scheduler/algorithm/predicates/metadata_test.go @@ -511,6 +511,39 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) { }, }, }, + topologyPairsPodSpreadMap: &topologyPairsPodSpreadMap{ + minMatches: []int32{1}, + topologyPairsMaps: &topologyPairsMaps{ + topologyPairToPods: map[topologyPair]podSet{ + {key: "name", value: "nodeA"}: { + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }: struct{}{}, + }, + {key: "name", value: "nodeC"}: { + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{ + NodeName: "nodeC", + }, + }: struct{}{}, + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeC"}, + }: struct{}{}, + }, + }, + podToTopologyPairs: map[string]topologyPairSet{ + "p1_": { + topologyPair{key: "name", value: "nodeA"}: struct{}{}, + }, + "p2_": { + topologyPair{key: "name", value: "nodeC"}: struct{}{}, + }, + "p6_": { + topologyPair{key: "name", value: "nodeC"}: struct{}{}, + }, + }, + }, + }, serviceAffinityInUse: true, serviceAffinityMatchingPodList: []*v1.Pod{ {ObjectMeta: metav1.ObjectMeta{Name: "pod1"}}, @@ -791,3 +824,409 @@ func TestGetTPMapMatchingIncomingAffinityAntiAffinity(t *testing.T) { }) } } + +func TestPodLabelsMatchesSpreadConstraints(t *testing.T) { + tests := []struct { + name string + podLabels map[string]string + constraints []v1.TopologySpreadConstraint + want bool + wantErr bool + }{ + { + name: "normal match", + podLabels: map[string]string{"foo": "", "bar": ""}, + constraints: []v1.TopologySpreadConstraint{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + }, + }, + want: true, + }, + { + name: "normal mismatch", + podLabels: map[string]string{"foo": "", "baz": ""}, + constraints: []v1.TopologySpreadConstraint{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + { + Key: "bar", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + }, + }, + want: false, + }, + { + name: "podLabels is nil", + constraints: []v1.TopologySpreadConstraint{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpExists, + }, + }, + }, + }, + }, + want: false, + }, + { + name: "constraint.LabelSelector is nil", + podLabels: map[string]string{ + "foo": "", + "bar": "", + }, + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + }, + }, + want: false, + }, + { + name: "both podLabels and constraint.LabelSelector are nil", + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + }, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := podLabelsMatchesSpreadConstraints(tt.podLabels, tt.constraints) + if (err != nil) != tt.wantErr { + t.Errorf("podLabelsMatchesSpreadConstraints() error = %v, wantErr %v", err, tt.wantErr) + } + if got != tt.want { + t.Errorf("podLabelsMatchesSpreadConstraints() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestGetTPMapMatchingSpreadConstraints(t *testing.T) { + // we need to inject the exact pod pointers to want.topologyPairsMaps.topologyPairToPods + // otherwise, *pod (as key of a map) will always fail in reflect.DeepEqual() + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + existingPods []*v1.Pod + injectPodPointers map[topologyPair][]int + want *topologyPairsPodSpreadMap + }{ + { + name: "clean cluster with one spreadConstraint", + pod: makePod().name("p").label("foo", "").spreadConstraint( + 1, "zone", hardSpread, makeLabelSelector().exists("foo").obj(), + ).obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-x").label("zone", "zone2").label("node", "node-x").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + // denotes no existing pod is matched on this zone pair, but still needed to be + // calculated if incoming pod matches its own spread constraints + {key: "zone", value: "zone1"}: []int{}, + {key: "zone", value: "zone2"}: []int{}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{0}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: make(map[string]topologyPairSet), + }, + }, + }, + { + name: "normal case with one spreadConstraint", + pod: makePod().name("p").label("foo", "").spreadConstraint( + 1, "zone", hardSpread, makeLabelSelector().exists("foo").obj(), + ).obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-x").label("zone", "zone2").label("node", "node-x").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + existingPods: []*v1.Pod{ + makePod().name("p-a1").node("node-a").label("foo", "").obj(), + makePod().name("p-a2").node("node-a").label("foo", "").obj(), + makePod().name("p-b1").node("node-b").label("foo", "").obj(), + makePod().name("p-y1").node("node-y").label("foo", "").obj(), + makePod().name("p-y2").node("node-y").label("foo", "").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + // denotes existingPods[0,1,2] + {key: "zone", value: "zone1"}: []int{0, 1, 2}, + // denotes existingPods[3,4] + {key: "zone", value: "zone2"}: []int{3, 4}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{2}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: map[string]topologyPairSet{ + "p-a1_": newPairSet("zone", "zone1"), + "p-a2_": newPairSet("zone", "zone1"), + "p-b1_": newPairSet("zone", "zone1"), + "p-y1_": newPairSet("zone", "zone2"), + "p-y2_": newPairSet("zone", "zone2"), + }, + }, + }, + }, + { + name: "namespace mis-match doesn't count", + pod: makePod().name("p").label("foo", "").spreadConstraint( + 1, "zone", hardSpread, makeLabelSelector().exists("foo").obj(), + ).obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-x").label("zone", "zone2").label("node", "node-x").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + existingPods: []*v1.Pod{ + makePod().name("p-a1").node("node-a").label("foo", "").obj(), + makePod().name("p-a2").namespace("ns1").node("node-a").label("foo", "").obj(), + makePod().name("p-b1").node("node-b").label("foo", "").obj(), + makePod().name("p-y1").namespace("ns2").node("node-y").label("foo", "").obj(), + makePod().name("p-y2").node("node-y").label("foo", "").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + {key: "zone", value: "zone1"}: []int{0, 2}, + {key: "zone", value: "zone2"}: []int{4}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{1}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: map[string]topologyPairSet{ + "p-a1_": newPairSet("zone", "zone1"), + "p-b1_": newPairSet("zone", "zone1"), + "p-y2_": newPairSet("zone", "zone2"), + }, + }, + }, + }, + { + name: "normal case with two spreadConstraints", + pod: makePod().name("p").label("foo", ""). + spreadConstraint(1, "zone", hardSpread, makeLabelSelector().exists("foo").obj()). + spreadConstraint(1, "node", hardSpread, makeLabelSelector().exists("foo").obj()). + obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-x").label("zone", "zone2").label("node", "node-x").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + existingPods: []*v1.Pod{ + makePod().name("p-a1").node("node-a").label("foo", "").obj(), + makePod().name("p-a2").node("node-a").label("foo", "").obj(), + makePod().name("p-b1").node("node-b").label("foo", "").obj(), + makePod().name("p-y1").node("node-y").label("foo", "").obj(), + makePod().name("p-y2").node("node-y").label("foo", "").obj(), + makePod().name("p-y3").node("node-y").label("foo", "").obj(), + makePod().name("p-y4").node("node-y").label("foo", "").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + {key: "zone", value: "zone1"}: []int{0, 1, 2}, + {key: "zone", value: "zone2"}: []int{3, 4, 5, 6}, + {key: "node", value: "node-a"}: []int{0, 1}, + {key: "node", value: "node-b"}: []int{2}, + {key: "node", value: "node-x"}: []int{}, + {key: "node", value: "node-y"}: []int{3, 4, 5, 6}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{3, 0}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: map[string]topologyPairSet{ + "p-a1_": newPairSet("zone", "zone1", "node", "node-a"), + "p-a2_": newPairSet("zone", "zone1", "node", "node-a"), + "p-b1_": newPairSet("zone", "zone1", "node", "node-b"), + "p-y1_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y2_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y3_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y4_": newPairSet("zone", "zone2", "node", "node-y"), + }, + }, + }, + }, + { + name: "soft spreadConstraints should be bypassed", + pod: makePod().name("p").label("foo", ""). + spreadConstraint(1, "zone", softSpread, makeLabelSelector().exists("foo").obj()). + spreadConstraint(1, "zone", hardSpread, makeLabelSelector().exists("foo").obj()). + spreadConstraint(1, "zone", softSpread, makeLabelSelector().exists("foo").obj()). + spreadConstraint(1, "node", hardSpread, makeLabelSelector().exists("foo").obj()). + obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + existingPods: []*v1.Pod{ + makePod().name("p-a1").node("node-a").label("foo", "").obj(), + makePod().name("p-a2").node("node-a").label("foo", "").obj(), + makePod().name("p-b1").node("node-b").label("foo", "").obj(), + makePod().name("p-y1").node("node-y").label("foo", "").obj(), + makePod().name("p-y2").node("node-y").label("foo", "").obj(), + makePod().name("p-y3").node("node-y").label("foo", "").obj(), + makePod().name("p-y4").node("node-y").label("foo", "").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + {key: "zone", value: "zone1"}: []int{0, 1, 2}, + {key: "zone", value: "zone2"}: []int{3, 4, 5, 6}, + {key: "node", value: "node-a"}: []int{0, 1}, + {key: "node", value: "node-b"}: []int{2}, + {key: "node", value: "node-y"}: []int{3, 4, 5, 6}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{3, 1}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: map[string]topologyPairSet{ + "p-a1_": newPairSet("zone", "zone1", "node", "node-a"), + "p-a2_": newPairSet("zone", "zone1", "node", "node-a"), + "p-b1_": newPairSet("zone", "zone1", "node", "node-b"), + "p-y1_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y2_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y3_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y4_": newPairSet("zone", "zone2", "node", "node-y"), + }, + }, + }, + }, + { + name: "different labelSelectors", + pod: makePod().name("p").label("foo", "").label("bar", ""). + spreadConstraint(1, "zone", hardSpread, makeLabelSelector().exists("foo").obj()). + spreadConstraint(1, "node", hardSpread, makeLabelSelector().exists("bar").obj()). + obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + existingPods: []*v1.Pod{ + makePod().name("p-a1").node("node-a").label("foo", "").obj(), + makePod().name("p-a2").node("node-a").label("foo", "").label("bar", "").obj(), + makePod().name("p-b1").node("node-b").label("foo", "").obj(), + makePod().name("p-y1").node("node-y").label("foo", "").obj(), + makePod().name("p-y2").node("node-y").label("foo", "").label("bar", "").obj(), + makePod().name("p-y3").node("node-y").label("foo", "").obj(), + makePod().name("p-y4").node("node-y").label("foo", "").label("bar", "").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + {key: "zone", value: "zone1"}: []int{1}, + {key: "zone", value: "zone2"}: []int{4, 6}, + {key: "node", value: "node-a"}: []int{1}, + {key: "node", value: "node-b"}: []int{}, + {key: "node", value: "node-y"}: []int{4, 6}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{1, 0}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: map[string]topologyPairSet{ + "p-a2_": newPairSet("zone", "zone1", "node", "node-a"), + "p-y2_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y4_": newPairSet("zone", "zone2", "node", "node-y"), + }, + }, + }, + }, + { + name: "two spreadConstraints, and with podAffinity", + pod: makePod().name("p").label("foo", ""). + nodeAffinityIn("node", []string{"node-a", "node-b", "node-y"}). // exclude node-x + spreadConstraint(1, "zone", hardSpread, makeLabelSelector().exists("foo").obj()). + spreadConstraint(1, "node", hardSpread, makeLabelSelector().exists("foo").obj()). + obj(), + nodes: []*v1.Node{ + makeNode().name("node-a").label("zone", "zone1").label("node", "node-a").obj(), + makeNode().name("node-b").label("zone", "zone1").label("node", "node-b").obj(), + makeNode().name("node-x").label("zone", "zone2").label("node", "node-x").obj(), + makeNode().name("node-y").label("zone", "zone2").label("node", "node-y").obj(), + }, + existingPods: []*v1.Pod{ + makePod().name("p-a1").node("node-a").label("foo", "").obj(), + makePod().name("p-a2").node("node-a").label("foo", "").obj(), + makePod().name("p-b1").node("node-b").label("foo", "").obj(), + makePod().name("p-y1").node("node-y").label("foo", "").obj(), + makePod().name("p-y2").node("node-y").label("foo", "").obj(), + makePod().name("p-y3").node("node-y").label("foo", "").obj(), + makePod().name("p-y4").node("node-y").label("foo", "").obj(), + }, + injectPodPointers: map[topologyPair][]int{ + {key: "zone", value: "zone1"}: []int{0, 1, 2}, + {key: "zone", value: "zone2"}: []int{3, 4, 5, 6}, + {key: "node", value: "node-a"}: []int{0, 1}, + {key: "node", value: "node-b"}: []int{2}, + {key: "node", value: "node-y"}: []int{3, 4, 5, 6}, + }, + want: &topologyPairsPodSpreadMap{ + minMatches: []int32{3, 1}, + topologyPairsMaps: &topologyPairsMaps{ + podToTopologyPairs: map[string]topologyPairSet{ + "p-a1_": newPairSet("zone", "zone1", "node", "node-a"), + "p-a2_": newPairSet("zone", "zone1", "node", "node-a"), + "p-b1_": newPairSet("zone", "zone1", "node", "node-b"), + "p-y1_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y2_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y3_": newPairSet("zone", "zone2", "node", "node-y"), + "p-y4_": newPairSet("zone", "zone2", "node", "node-y"), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.want.topologyPairToPods = make(map[topologyPair]podSet) + for pair, indexes := range tt.injectPodPointers { + pSet := make(podSet) + for _, i := range indexes { + pSet[tt.existingPods[i]] = struct{}{} + } + tt.want.topologyPairToPods[pair] = pSet + } + nodeInfoMap := schedulernodeinfo.CreateNodeNameToInfoMap(tt.existingPods, tt.nodes) + if got, _ := getTPMapMatchingSpreadConstraints(tt.pod, nodeInfoMap); !reflect.DeepEqual(got, tt.want) { + t.Errorf("getTPMapMatchingSpreadConstraints() = %v, want %v", got, tt.want) + } + }) + } +} + +var ( + hardSpread = v1.DoNotSchedule + softSpread = v1.ScheduleAnyway +) + +func newPairSet(kv ...string) topologyPairSet { + result := make(topologyPairSet) + for i := 0; i < len(kv); i += 2 { + pair := topologyPair{key: kv[i], value: kv[i+1]} + result[pair] = struct{}{} + } + return result +} diff --git a/pkg/scheduler/algorithm/predicates/utils.go b/pkg/scheduler/algorithm/predicates/utils.go index dc833b2d6fb..2e7a2281fea 100644 --- a/pkg/scheduler/algorithm/predicates/utils.go +++ b/pkg/scheduler/algorithm/predicates/utils.go @@ -19,8 +19,9 @@ package predicates import ( "strings" - v1 "k8s.io/api/core/v1" + "k8s.io/api/core/v1" storagev1beta1 "k8s.io/api/storage/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -147,3 +148,169 @@ func isCSIMigrationOn(csiNode *storagev1beta1.CSINode, pluginName string) bool { return mpaSet.Has(pluginName) } + +// utilities for building pod/node objects using a "chained" manner +type nodeSelectorWrapper struct{ v1.NodeSelector } + +func makeNodeSelector() *nodeSelectorWrapper { + return &nodeSelectorWrapper{v1.NodeSelector{}} +} + +// NOTE: each time we append a selectorTerm into `s` +// and overall all selecterTerms are ORed +func (s *nodeSelectorWrapper) in(key string, vals []string) *nodeSelectorWrapper { + expression := v1.NodeSelectorRequirement{ + Key: key, + Operator: v1.NodeSelectorOpIn, + Values: vals, + } + selectorTerm := v1.NodeSelectorTerm{} + selectorTerm.MatchExpressions = append(selectorTerm.MatchExpressions, expression) + s.NodeSelectorTerms = append(s.NodeSelectorTerms, selectorTerm) + return s +} + +func (s *nodeSelectorWrapper) obj() *v1.NodeSelector { + return &s.NodeSelector +} + +type labelSelectorWrapper struct{ metav1.LabelSelector } + +func makeLabelSelector() *labelSelectorWrapper { + return &labelSelectorWrapper{metav1.LabelSelector{}} +} + +func (s *labelSelectorWrapper) label(k, v string) *labelSelectorWrapper { + if s.MatchLabels == nil { + s.MatchLabels = make(map[string]string) + } + s.MatchLabels[k] = v + return s +} + +func (s *labelSelectorWrapper) in(key string, vals []string) *labelSelectorWrapper { + expression := metav1.LabelSelectorRequirement{ + Key: key, + Operator: metav1.LabelSelectorOpIn, + Values: vals, + } + s.MatchExpressions = append(s.MatchExpressions, expression) + return s +} + +func (s *labelSelectorWrapper) notIn(key string, vals []string) *labelSelectorWrapper { + expression := metav1.LabelSelectorRequirement{ + Key: key, + Operator: metav1.LabelSelectorOpNotIn, + Values: vals, + } + s.MatchExpressions = append(s.MatchExpressions, expression) + return s +} + +func (s *labelSelectorWrapper) exists(k string) *labelSelectorWrapper { + expression := metav1.LabelSelectorRequirement{ + Key: k, + Operator: metav1.LabelSelectorOpExists, + } + s.MatchExpressions = append(s.MatchExpressions, expression) + return s +} + +func (s *labelSelectorWrapper) notExist(k string) *labelSelectorWrapper { + expression := metav1.LabelSelectorRequirement{ + Key: k, + Operator: metav1.LabelSelectorOpDoesNotExist, + } + s.MatchExpressions = append(s.MatchExpressions, expression) + return s +} + +func (s *labelSelectorWrapper) obj() *metav1.LabelSelector { + return &s.LabelSelector +} + +type podWrapper struct{ v1.Pod } + +func makePod() *podWrapper { + return &podWrapper{v1.Pod{}} +} + +func (p *podWrapper) obj() *v1.Pod { + return &p.Pod +} + +func (p *podWrapper) name(s string) *podWrapper { + p.Name = s + return p +} + +func (p *podWrapper) namespace(s string) *podWrapper { + p.Namespace = s + return p +} + +func (p *podWrapper) node(s string) *podWrapper { + p.Spec.NodeName = s + return p +} + +func (p *podWrapper) nodeSelector(m map[string]string) *podWrapper { + p.Spec.NodeSelector = m + return p +} + +// particular represents HARD node affinity +func (p *podWrapper) nodeAffinityIn(key string, vals []string) *podWrapper { + if p.Spec.Affinity == nil { + p.Spec.Affinity = &v1.Affinity{} + } + if p.Spec.Affinity.NodeAffinity == nil { + p.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{} + } + nodeSelector := makeNodeSelector().in(key, vals).obj() + p.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nodeSelector + return p +} + +func (p *podWrapper) spreadConstraint(maxSkew int, tpKey string, mode v1.UnsatisfiableConstraintResponse, selector *metav1.LabelSelector) *podWrapper { + c := v1.TopologySpreadConstraint{ + MaxSkew: int32(maxSkew), + TopologyKey: tpKey, + WhenUnsatisfiable: mode, + LabelSelector: selector, + } + p.Spec.TopologySpreadConstraints = append(p.Spec.TopologySpreadConstraints, c) + return p +} + +func (p *podWrapper) label(k, v string) *podWrapper { + if p.Labels == nil { + p.Labels = make(map[string]string) + } + p.Labels[k] = v + return p +} + +type nodeWrapper struct{ v1.Node } + +func makeNode() *nodeWrapper { + return &nodeWrapper{v1.Node{}} +} + +func (n *nodeWrapper) obj() *v1.Node { + return &n.Node +} + +func (n *nodeWrapper) name(s string) *nodeWrapper { + n.Name = s + return n +} + +func (n *nodeWrapper) label(k, v string) *nodeWrapper { + if n.Labels == nil { + n.Labels = make(map[string]string) + } + n.Labels[k] = v + return n +}