diff --git a/test/integration/volumescheduling/volume_binding_test.go b/test/integration/volumescheduling/volume_binding_test.go index 35bf07bb79a..783c4144200 100644 --- a/test/integration/volumescheduling/volume_binding_test.go +++ b/test/integration/volumescheduling/volume_binding_test.go @@ -1024,7 +1024,7 @@ func TestRescheduleProvisioning(t *testing.T) { } // Prepare node and storage class. - testNode := makeNode(0) + testNode := makeNode(1) if _, err := clientset.CoreV1().Nodes().Create(context.TODO(), testNode, metav1.CreateOptions{}); err != nil { t.Fatalf("Failed to create Node %q: %v", testNode.Name, err) } @@ -1078,7 +1078,7 @@ func setupCluster(t *testing.T, nsName string, numberOfNodes int, resyncPeriod t // Create shared objects // Create nodes for i := 0; i < numberOfNodes; i++ { - testNode := makeNode(i) + testNode := makeNode(i + 1) if _, err := clientset.CoreV1().Nodes().Create(context.TODO(), testNode, metav1.CreateOptions{}); err != nil { t.Fatalf("Failed to create Node %q: %v", testNode.Name, err) } @@ -1199,21 +1199,6 @@ func makePV(name, scName, pvcName, ns, node string) *v1.PersistentVolume { Path: "/test-path", }, }, - NodeAffinity: &v1.VolumeNodeAffinity{ - Required: &v1.NodeSelector{ - NodeSelectorTerms: []v1.NodeSelectorTerm{ - { - MatchExpressions: []v1.NodeSelectorRequirement{ - { - Key: nodeAffinityLabelKey, - Operator: v1.NodeSelectorOpIn, - Values: []string{node}, - }, - }, - }, - }, - }, - }, }, } @@ -1221,6 +1206,24 @@ func makePV(name, scName, pvcName, ns, node string) *v1.PersistentVolume { pv.Spec.ClaimRef = &v1.ObjectReference{Name: pvcName, Namespace: ns} } + if node != "" { + pv.Spec.NodeAffinity = &v1.VolumeNodeAffinity{ + Required: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeAffinityLabelKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{node}, + }, + }, + }, + }, + }, + } + } + return pv } @@ -1280,11 +1283,13 @@ func makePod(name, ns string, pvcs []string) *v1.Pod { } } +// makeNode creates a node with the name "node-" func makeNode(index int) *v1.Node { + name := fmt.Sprintf("node-%d", index) return &v1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("node-%d", index+1), - Labels: map[string]string{nodeAffinityLabelKey: fmt.Sprintf("node-%d", index+1)}, + Name: name, + Labels: map[string]string{nodeAffinityLabelKey: name}, }, Spec: v1.NodeSpec{Unschedulable: false}, Status: v1.NodeStatus{ diff --git a/test/integration/volumescheduling/volume_capacity_priority_test.go b/test/integration/volumescheduling/volume_capacity_priority_test.go new file mode 100644 index 00000000000..75792988f9a --- /dev/null +++ b/test/integration/volumescheduling/volume_capacity_priority_test.go @@ -0,0 +1,310 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package volumescheduling + +// This file tests the VolumeCapacityPriority feature. + +import ( + "context" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" +) + +var ( + waitSSDSC = makeStorageClass("ssd", &modeWait) + waitHDDSC = makeStorageClass("hdd", &modeWait) +) + +func mergeNodeLabels(node *v1.Node, labels map[string]string) *v1.Node { + for k, v := range labels { + node.Labels[k] = v + } + return node +} + +func setupClusterForVolumeCapacityPriority(t *testing.T, nsName string, resyncPeriod time.Duration, provisionDelaySeconds int) *testConfig { + textCtx := initTestSchedulerWithOptions(t, initTestMaster(t, nsName, nil), resyncPeriod) + clientset := textCtx.clientSet + ns := textCtx.ns.Name + + ctrl, informerFactory, err := initPVController(t, textCtx, provisionDelaySeconds) + if err != nil { + t.Fatalf("Failed to create PV controller: %v", err) + } + go ctrl.Run(textCtx.ctx.Done()) + + // Start informer factory after all controllers are configured and running. + informerFactory.Start(textCtx.ctx.Done()) + informerFactory.WaitForCacheSync(textCtx.ctx.Done()) + + return &testConfig{ + client: clientset, + ns: ns, + stop: textCtx.ctx.Done(), + teardown: func() { + klog.Infof("test cluster %q start to tear down", ns) + deleteTestObjects(clientset, ns, metav1.DeleteOptions{}) + cleanupTest(t, textCtx) + }, + } +} + +func TestVolumeCapacityPriority(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.VolumeCapacityPriority, true)() + + config := setupClusterForVolumeCapacityPriority(t, "volume-capacity-priority", 0, 0) + defer config.teardown() + + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + pvs []*v1.PersistentVolume + pvcs []*v1.PersistentVolumeClaim + wantNodeName string + }{ + { + name: "local volumes with close capacity are preferred", + pod: makePod("pod", config.ns, []string{"data"}), + nodes: []*v1.Node{ + makeNode(0), + makeNode(1), + makeNode(2), + }, + pvs: []*v1.PersistentVolume{ + setPVNodeAffinity(setPVCapacity(makePV("pv-0", waitSSDSC.Name, "", config.ns, "node-0"), resource.MustParse("200Gi")), map[string][]string{v1.LabelHostname: {"node-0"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-1", waitSSDSC.Name, "", config.ns, "node-0"), resource.MustParse("200Gi")), map[string][]string{v1.LabelHostname: {"node-0"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-2", waitSSDSC.Name, "", config.ns, "node-1"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-1"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-3", waitSSDSC.Name, "", config.ns, "node-1"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-1"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-4", waitSSDSC.Name, "", config.ns, "node-2"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-2"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-5", waitSSDSC.Name, "", config.ns, "node-2"), resource.MustParse("50Gi")), map[string][]string{v1.LabelHostname: {"node-2"}}), + }, + pvcs: []*v1.PersistentVolumeClaim{ + setPVCRequestStorage(makePVC("data", config.ns, &waitSSDSC.Name, ""), resource.MustParse("20Gi")), + }, + wantNodeName: "node-2", + }, + { + name: "local volumes with close capacity are preferred (multiple pvcs)", + pod: makePod("pod", config.ns, []string{"data-0", "data-1"}), + nodes: []*v1.Node{ + makeNode(0), + makeNode(1), + makeNode(2), + }, + pvs: []*v1.PersistentVolume{ + setPVNodeAffinity(setPVCapacity(makePV("pv-0", waitSSDSC.Name, "", config.ns, "node-0"), resource.MustParse("200Gi")), map[string][]string{v1.LabelHostname: {"node-0"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-1", waitSSDSC.Name, "", config.ns, "node-0"), resource.MustParse("200Gi")), map[string][]string{v1.LabelHostname: {"node-0"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-2", waitSSDSC.Name, "", config.ns, "node-1"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-1"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-3", waitSSDSC.Name, "", config.ns, "node-1"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-1"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-4", waitSSDSC.Name, "", config.ns, "node-2"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-2"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-5", waitSSDSC.Name, "", config.ns, "node-2"), resource.MustParse("50Gi")), map[string][]string{v1.LabelHostname: {"node-2"}}), + }, + pvcs: []*v1.PersistentVolumeClaim{ + setPVCRequestStorage(makePVC("data-0", config.ns, &waitSSDSC.Name, ""), resource.MustParse("80Gi")), + setPVCRequestStorage(makePVC("data-1", config.ns, &waitSSDSC.Name, ""), resource.MustParse("80Gi")), + }, + wantNodeName: "node-1", + }, + { + name: "local volumes with close capacity are preferred (multiple pvcs, multiple classes)", + pod: makePod("pod", config.ns, []string{"data-0", "data-1"}), + nodes: []*v1.Node{ + makeNode(0), + makeNode(1), + makeNode(2), + }, + pvs: []*v1.PersistentVolume{ + setPVNodeAffinity(setPVCapacity(makePV("pv-0", waitSSDSC.Name, "", config.ns, "node-0"), resource.MustParse("200Gi")), map[string][]string{v1.LabelHostname: {"node-0"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-1", waitHDDSC.Name, "", config.ns, "node-0"), resource.MustParse("200Gi")), map[string][]string{v1.LabelHostname: {"node-0"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-2", waitSSDSC.Name, "", config.ns, "node-1"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-1"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-3", waitHDDSC.Name, "", config.ns, "node-1"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-1"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-4", waitSSDSC.Name, "", config.ns, "node-2"), resource.MustParse("100Gi")), map[string][]string{v1.LabelHostname: {"node-2"}}), + setPVNodeAffinity(setPVCapacity(makePV("pv-5", waitHDDSC.Name, "", config.ns, "node-2"), resource.MustParse("50Gi")), map[string][]string{v1.LabelHostname: {"node-2"}}), + }, + pvcs: []*v1.PersistentVolumeClaim{ + setPVCRequestStorage(makePVC("data-0", config.ns, &waitSSDSC.Name, ""), resource.MustParse("80Gi")), + setPVCRequestStorage(makePVC("data-1", config.ns, &waitHDDSC.Name, ""), resource.MustParse("80Gi")), + }, + wantNodeName: "node-1", + }, + { + name: "zonal volumes with close capacity are preferred (multiple pvcs, multiple classes)", + pod: makePod("pod", config.ns, []string{"data-0", "data-1"}), + nodes: []*v1.Node{ + mergeNodeLabels(makeNode(0), map[string]string{ + "topology.kubernetes.io/region": "region-a", + "topology.kubernetes.io/zone": "zone-a", + }), + mergeNodeLabels(makeNode(1), map[string]string{ + "topology.kubernetes.io/region": "region-b", + "topology.kubernetes.io/zone": "zone-b", + }), + mergeNodeLabels(makeNode(2), map[string]string{ + "topology.kubernetes.io/region": "region-c", + "topology.kubernetes.io/zone": "zone-c", + }), + }, + pvs: []*v1.PersistentVolume{ + setPVNodeAffinity(setPVCapacity(makePV("pv-0", waitSSDSC.Name, "", config.ns, ""), resource.MustParse("200Gi")), map[string][]string{ + "topology.kubernetes.io/region": {"region-a"}, + "topology.kubernetes.io/zone": {"zone-a"}, + }), + setPVNodeAffinity(setPVCapacity(makePV("pv-1", waitHDDSC.Name, "", config.ns, ""), resource.MustParse("200Gi")), map[string][]string{ + "topology.kubernetes.io/region": {"region-a"}, + "topology.kubernetes.io/zone": {"zone-a"}, + }), + setPVNodeAffinity(setPVCapacity(makePV("pv-2", waitSSDSC.Name, "", config.ns, ""), resource.MustParse("100Gi")), map[string][]string{ + "topology.kubernetes.io/region": {"region-b"}, + "topology.kubernetes.io/zone": {"zone-b"}, + }), + setPVNodeAffinity(setPVCapacity(makePV("pv-3", waitHDDSC.Name, "", config.ns, ""), resource.MustParse("100Gi")), map[string][]string{ + "topology.kubernetes.io/region": {"region-b"}, + "topology.kubernetes.io/zone": {"zone-b"}, + }), + setPVNodeAffinity(setPVCapacity(makePV("pv-4", waitSSDSC.Name, "", config.ns, ""), resource.MustParse("100Gi")), map[string][]string{ + "topology.kubernetes.io/region": {"region-c"}, + "topology.kubernetes.io/zone": {"zone-c"}, + }), + setPVNodeAffinity(setPVCapacity(makePV("pv-5", waitHDDSC.Name, "", config.ns, ""), resource.MustParse("50Gi")), map[string][]string{ + "topology.kubernetes.io/region": {"region-c"}, + "topology.kubernetes.io/zone": {"zone-c"}, + }), + }, + pvcs: []*v1.PersistentVolumeClaim{ + setPVCRequestStorage(makePVC("data-0", config.ns, &waitSSDSC.Name, ""), resource.MustParse("80Gi")), + setPVCRequestStorage(makePVC("data-1", config.ns, &waitHDDSC.Name, ""), resource.MustParse("80Gi")), + }, + wantNodeName: "node-1", + }, + } + + c := config.client + + t.Log("Creating StorageClasses") + classes := map[string]*storagev1.StorageClass{} + classes[waitSSDSC.Name] = waitSSDSC + classes[waitHDDSC.Name] = waitHDDSC + for _, sc := range classes { + if _, err := c.StorageV1().StorageClasses().Create(context.TODO(), sc, metav1.CreateOptions{}); err != nil { + t.Fatalf("failed to create StorageClass %q: %v", sc.Name, err) + } + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Log("Creating Nodes") + for _, node := range tt.nodes { + if _, err := c.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}); err != nil { + t.Fatalf("failed to create Node %q: %v", node.Name, err) + } + } + + t.Log("Creating PVs") + for _, pv := range tt.pvs { + if _, err := c.CoreV1().PersistentVolumes().Create(context.TODO(), pv, metav1.CreateOptions{}); err != nil { + t.Fatalf("failed to create PersistentVolume %q: %v", pv.Name, err) + } + } + + // https://github.com/kubernetes/kubernetes/issues/85320 + t.Log("Waiting for PVs to become available to avoid race condition in PV controller") + for _, pv := range tt.pvs { + if err := waitForPVPhase(c, pv.Name, v1.VolumeAvailable); err != nil { + t.Fatalf("failed to wait for PersistentVolume %q to become available: %v", pv.Name, err) + } + } + + t.Log("Creating PVCs") + for _, pvc := range tt.pvcs { + if _, err := c.CoreV1().PersistentVolumeClaims(config.ns).Create(context.TODO(), pvc, metav1.CreateOptions{}); err != nil { + t.Fatalf("failed to create PersistentVolumeClaim %q: %v", pvc.Name, err) + } + } + + t.Log("Create Pod") + if _, err := c.CoreV1().Pods(config.ns).Create(context.TODO(), tt.pod, metav1.CreateOptions{}); err != nil { + t.Fatalf("failed to create Pod %q: %v", tt.pod.Name, err) + } + if err := waitForPodToSchedule(c, tt.pod); err != nil { + t.Errorf("failed to schedule Pod %q: %v", tt.pod.Name, err) + } + + t.Log("Verify the assigned node") + pod, err := c.CoreV1().Pods(config.ns).Get(context.TODO(), tt.pod.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get Pod %q: %v", tt.pod.Name, err) + } + if pod.Spec.NodeName != tt.wantNodeName { + t.Errorf("pod %s assigned node expects %q, got %q", pod.Name, tt.wantNodeName, pod.Spec.NodeName) + } + + t.Log("Cleanup test objects") + c.CoreV1().Nodes().DeleteCollection(context.TODO(), deleteOption, metav1.ListOptions{}) + c.CoreV1().Pods(config.ns).DeleteCollection(context.TODO(), deleteOption, metav1.ListOptions{}) + c.CoreV1().PersistentVolumeClaims(config.ns).DeleteCollection(context.TODO(), deleteOption, metav1.ListOptions{}) + c.CoreV1().PersistentVolumes().DeleteCollection(context.TODO(), deleteOption, metav1.ListOptions{}) + }) + } +} + +func setPVNodeAffinity(pv *v1.PersistentVolume, keyValues map[string][]string) *v1.PersistentVolume { + matchExpressions := make([]v1.NodeSelectorRequirement, 0) + for key, values := range keyValues { + matchExpressions = append(matchExpressions, v1.NodeSelectorRequirement{ + Key: key, + Operator: v1.NodeSelectorOpIn, + Values: values, + }) + } + pv.Spec.NodeAffinity = &v1.VolumeNodeAffinity{ + Required: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: matchExpressions, + }, + }, + }, + } + return pv +} + +func setPVCapacity(pv *v1.PersistentVolume, capacity resource.Quantity) *v1.PersistentVolume { + if pv.Spec.Capacity == nil { + pv.Spec.Capacity = make(v1.ResourceList) + } + pv.Spec.Capacity[v1.ResourceName(v1.ResourceStorage)] = capacity + return pv +} + +func setPVCRequestStorage(pvc *v1.PersistentVolumeClaim, request resource.Quantity) *v1.PersistentVolumeClaim { + pvc.Spec.Resources = v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): request, + }, + } + return pvc +}