This commit is contained in:
moriya 2024-05-20 23:13:56 +09:00
parent 58143ff3eb
commit 1b8fb3a838
2 changed files with 3 additions and 506 deletions

View File

@ -20,8 +20,6 @@ import (
"context"
"errors"
"fmt"
"reflect"
"sort"
v1 "k8s.io/api/core/v1"
storage "k8s.io/api/storage/v1"
@ -246,7 +244,7 @@ func (pl *VolumeZone) EventsToRegister() []framework.ClusterEventWithHint {
return []framework.ClusterEventWithHint{
// New storageClass with bind mode `VolumeBindingWaitForFirstConsumer` will make a pod schedulable.
// Due to immutable field `storageClass.volumeBindingMode`, storageClass update events are ignored.
{Event: framework.ClusterEvent{Resource: framework.StorageClass, ActionType: framework.Add}, QueueingHintFn: pl.isSchedulableAfterStorageClassAdded},
{Event: framework.ClusterEvent{Resource: framework.StorageClass, ActionType: framework.Add}},
// A new node or updating a node's volume zone labels may make a pod schedulable.
//
// A note about UpdateNodeTaint event:
@ -257,12 +255,12 @@ func (pl *VolumeZone) EventsToRegister() []framework.ClusterEventWithHint {
// As a workaround, we add UpdateNodeTaint event to catch the case.
// We can remove UpdateNodeTaint when we remove the preCheck feature.
// See: https://github.com/kubernetes/kubernetes/issues/110175
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}},
// A new pvc may make a pod schedulable.
// PVC's VolumeName is mutable if old PVC's volumeName is empty.
{Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPersistentVolumeClaimChange},
// A new pv or updating a pv's volume zone labels may make a pod schedulable.
{Event: framework.ClusterEvent{Resource: framework.PersistentVolume, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPersistentVolumeChange},
{Event: framework.ClusterEvent{Resource: framework.PersistentVolume, ActionType: framework.Add | framework.Update}},
}
}
@ -280,70 +278,6 @@ func (pl *VolumeZone) getPersistentVolumeClaimNameFromPod(pod *v1.Pod) []string
return pvcNames
}
// isSchedulableAfterStorageClassAdded is invoked whenever a StorageClass is added.
// It checks whether the addition of StorageClass has made a previously unschedulable pod schedulable.
// Only a new StorageClass with WaitForFirstConsumer will cause a pod to become schedulable.
func (pl *VolumeZone) isSchedulableAfterStorageClassAdded(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
_, addedStorageClass, err := util.As[*storage.StorageClass](nil, newObj)
if err != nil {
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterStorageClassAdded: %w", err)
}
pvcNames := pl.getPersistentVolumeClaimNameFromPod(pod)
for _, pvcName := range pvcNames {
pvc, err := pl.pvcLister.PersistentVolumeClaims(pod.Namespace).Get(pvcName)
if err != nil {
logger.Error(err, "unexpected error during getting PVC", "Namespace", pod.Namespace, "pvc", pvcName)
continue
}
pvName := pvc.Spec.VolumeName
if pvName != "" {
// We can skip PersistentVolume because we only want to check storageClass
continue
}
scName := storagehelpers.GetPersistentVolumeClaimClass(pvc)
if scName == addedStorageClass.Name {
if (addedStorageClass.VolumeBindingMode != nil) && (*addedStorageClass.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer) {
logger.V(5).Info("a new WaitForFirstConsumer storageClass for the pod's PVC is created, which might make the pod schedulable", "storageClass", klog.KObj(addedStorageClass), "pod", klog.KObj(pod))
return framework.Queue, nil
}
}
}
logger.V(5).Info("StorageClass was created but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "StorageClass", klog.KObj(addedStorageClass))
return framework.QueueSkip, nil
}
// isSchedulableAfterNodeChange is invoked whenever a node added or updated.
// It checks whether the change of Node has made a previously unschedulable pod schedulable.
// If the node label related to pod's pv topology is changed, it could make the pod schedulable.
func (pl *VolumeZone) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
if err != nil {
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterNodeChange: %w", err)
}
podPVTopologies, status := pl.getPVbyPod(logger, pod)
if !status.IsSuccess() {
return framework.Queue, status.AsError()
}
_, isMatched := pl.isSchedulableNode(logger, podPVTopologies, modifiedNode, pod)
if !isMatched {
logger.V(5).Info("node was created or updated but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.QueueSkip, nil
}
wasMatched := false
if originalNode != nil {
_, wasMatched = pl.isSchedulableNode(logger, podPVTopologies, originalNode, pod)
}
if !wasMatched {
logger.V(5).Info("node was created or updated, which might make the pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.Queue, nil
}
logger.V(5).Info("node was created or updated but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.QueueSkip, nil
}
// isSchedulablerNode checks whether the node can schedule the pod.
func (pl *VolumeZone) isSchedulableNode(logger klog.Logger, podPVTopologies []pvTopology, node *v1.Node, pod *v1.Pod) (*framework.Status, bool) {
hasAnyNodeConstraint := false
@ -429,28 +363,6 @@ func (pl *VolumeZone) checkPVCBindingToPodPV(logger klog.Logger, pvc *v1.Persist
return false
}
// isSchedulableAfterPersistentVolumeChange is invoked whenever a PersistentVolume added or updated.
// It checks whether the change of PVC has made a previously unschedulable pod schedulable.
// Changing the PV topology labels could cause the pod to become schedulable.
func (pl *VolumeZone) isSchedulableAfterPersistentVolumeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalPV, modifiedPV, err := util.As[*v1.PersistentVolume](oldObj, newObj)
if err != nil {
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPersistentVolumeChange: %w", err)
}
originalPVTopologies := make([]pvTopology, 0)
if originalPV != nil {
originalPVTopologies, _ = pl.getPVTopologiesAndSort(logger, pod, originalPV)
}
modifiedPVTopologies, isPodBoundToPV := pl.getPVTopologiesAndSort(logger, pod, modifiedPV)
if isPodBoundToPV && !reflect.DeepEqual(originalPVTopologies, modifiedPVTopologies) {
logger.V(5).Info("PV was created or updated, which might make the pod schedulable", "pod", klog.KObj(pod), "PV", klog.KObj(modifiedPV))
return framework.Queue, nil
}
logger.V(5).Info("PV was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "PV", klog.KObj(modifiedPV))
return framework.QueueSkip, nil
}
// getPVTopologiesFromPV retrieves pvTopology from a given PV and returns the array
func (pl *VolumeZone) getPVTopologiesFromPV(logger klog.Logger, pv *v1.PersistentVolume) []pvTopology {
podPVTopologies := make([]pvTopology, 0)
@ -471,38 +383,6 @@ func (pl *VolumeZone) getPVTopologiesFromPV(logger klog.Logger, pv *v1.Persisten
return podPVTopologies
}
// getPVTopologiesAndSort checks whether target PV is bound to pod's PVC.
// If PV is bound to pod's PVC, get PVTopologies and Sort.
func (pl *VolumeZone) getPVTopologiesAndSort(logger klog.Logger, pod *v1.Pod, pv *v1.PersistentVolume) ([]pvTopology, bool) {
isPodBoundToPV := false
podPVTopologies := make([]pvTopology, 0)
pvcNames := pl.getPersistentVolumeClaimNameFromPod(pod)
for _, pvcName := range pvcNames {
pvc, err := pl.pvcLister.PersistentVolumeClaims(pod.Namespace).Get(pvcName)
if err != nil {
logger.Error(err, "unexpected error during getting PVC", "Namespace", pod.Namespace, "pvc", pvcName)
continue
}
// If target PV is bound to pod's PVC, get PVTopologies
pvName := pvc.Spec.VolumeName
if pvName == pv.Name {
isPodBoundToPV = true
podPVTopologies = append(podPVTopologies, pl.getPVTopologiesFromPV(logger, pv)...)
// One PV is bound to one PVC. One PV can't be bound to more than one PVC.
break
}
}
// Sort PVTopologies based on key order
sort.Slice(podPVTopologies, func(i, j int) bool {
// One PV is bound to one PVC. One PV can't be bound to more than one PVC.
// That's why we don't use pvName here. We use key for sorting podPVTopologies.
return podPVTopologies[i].key > podPVTopologies[j].key
})
return podPVTopologies, isPodBoundToPV
}
// New initializes a new plugin and returns it.
func New(_ context.Context, _ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
informerFactory := handle.SharedInformerFactory()

View File

@ -541,244 +541,6 @@ func TestWithBinding(t *testing.T) {
}
}
func TestIsSchedulableAfterStorageClassAdded(t *testing.T) {
var modeWait = storagev1.VolumeBindingWaitForFirstConsumer
pvLister := tf.PersistentVolumeLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "Vol_1", Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"}},
},
}
pvcLister := tf.PersistentVolumeClaimLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "PVC_1", Namespace: "default"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_1"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "PVC_2", Namespace: "default"},
Spec: v1.PersistentVolumeClaimSpec{StorageClassName: ptr.To("SC_2")},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "PVC_3", Namespace: "default"},
Spec: v1.PersistentVolumeClaimSpec{StorageClassName: ptr.To("SC_3")},
},
}
scLister := tf.StorageClassLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "SC_1", Namespace: "default"},
},
}
testcases := map[string]struct {
pod *v1.Pod
oldObj, newObj interface{}
expectedHint framework.QueueingHint
expectedErr bool
}{
"backoff-wrong-new-object": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: "not-a-storageclass",
expectedHint: framework.Queue,
expectedErr: true,
},
"pvc-was-not-bound-to-sc": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{Name: "SC_1"},
},
expectedHint: framework.QueueSkip,
},
"pvc-was-bound-to-different-sc": {
pod: createPodWithVolume("pod_1", "PVC_2"),
newObj: &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{Name: "SC_1"},
},
expectedHint: framework.QueueSkip,
},
"pvc-was-bound-to-added-sc-but-not-wait-mode": {
pod: createPodWithVolume("pod_1", "PVC_3"),
newObj: &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{Name: "SC_3"},
},
expectedHint: framework.QueueSkip,
},
"pvc-was-bound-to-added-sc-and-wait-mode": {
pod: createPodWithVolume("pod_1", "PVC_3"),
newObj: &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{Name: "SC_3"},
VolumeBindingMode: &modeWait,
},
expectedHint: framework.Queue,
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
logger, _ := ktesting.NewTestContext(t)
p := &VolumeZone{
pvLister,
pvcLister,
scLister,
}
got, err := p.isSchedulableAfterStorageClassAdded(logger, tc.pod, tc.oldObj, tc.newObj)
if err != nil && !tc.expectedErr {
t.Errorf("unexpected error: %v", err)
}
if got != tc.expectedHint {
t.Errorf("isSchedulableAfterStorageClassAdded() = %v, want %v", got, tc.expectedHint)
}
})
}
}
func TestIsSchedulableAfterNodeChange(t *testing.T) {
pvLister := tf.PersistentVolumeLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "Vol_1", Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"}},
},
}
pvcLister := tf.PersistentVolumeClaimLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "PVC_1", Namespace: "default"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_1"},
},
}
testcases := map[string]struct {
pod *v1.Pod
oldObj, newObj interface{}
expectedHint framework.QueueingHint
expectedErr bool
}{
"backoff-wrong-new-object": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: "not-a-node",
expectedHint: framework.Queue,
expectedErr: true,
},
"backoff-wrong-old-object": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: "not-a-node",
newObj: st.MakeNode().Obj(),
expectedHint: framework.Queue,
expectedErr: true,
},
"pvcName-is-null-and-getPVbyPod-return-unschedulable": {
pod: createPodWithVolume("pod_1", ""),
oldObj: st.MakeNode().Obj(),
newObj: st.MakeNode().Obj(),
expectedHint: framework.Queue,
expectedErr: true,
},
"new-node-was-added-and-matched-labels": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
expectedHint: framework.Queue,
},
"new-node-was-added-but-didn't-match-key": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
expectedHint: framework.QueueSkip,
},
"non-match-node-was-updated-and-matched-labels": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
newObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
expectedHint: framework.Queue,
},
"non-match-node-was-updated-but-didn't-match-key": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
newObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-c"},
},
},
expectedHint: framework.QueueSkip,
},
"matched-node-was-updated-and-matched-labels": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
newObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
expectedHint: framework.QueueSkip,
},
"matched-node-was-updated-but-didn't-match-key": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
newObj: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "host1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
expectedHint: framework.QueueSkip,
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
logger, _ := ktesting.NewTestContext(t)
p := &VolumeZone{
pvLister,
pvcLister,
nil,
}
got, err := p.isSchedulableAfterNodeChange(logger, tc.pod, tc.oldObj, tc.newObj)
if err != nil && !tc.expectedErr {
t.Errorf("unexpected error: %v", err)
}
if got != tc.expectedHint {
t.Errorf("isSchedulableAfterNodeChange() = %v, want %v", got, tc.expectedHint)
}
})
}
}
func TestIsSchedulableAfterPersistentVolumeClaimAdded(t *testing.T) {
var modeWait = storagev1.VolumeBindingWaitForFirstConsumer
pvLister := tf.PersistentVolumeLister{
@ -930,151 +692,6 @@ func TestIsSchedulableAfterPersistentVolumeClaimAdded(t *testing.T) {
}
}
func TestIsSchedulableAfterPersistentVolumeChange(t *testing.T) {
pvLister := tf.PersistentVolumeLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "Vol_2", Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"}},
},
}
pvcLister := tf.PersistentVolumeClaimLister{
{
ObjectMeta: metav1.ObjectMeta{Name: "PVC_1", Namespace: "default"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_1"},
},
}
testcases := map[string]struct {
pod *v1.Pod
oldObj, newObj interface{}
expectedHint framework.QueueingHint
expectedErr bool
}{
"backoff-wrong-new-object": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: "not-a-pv",
expectedHint: framework.Queue,
expectedErr: true,
},
"backoff-wrong-old-object": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: "not-a-pv",
newObj: st.MakePersistentVolume().Name("Vol_1").Obj(),
expectedHint: framework.Queue,
expectedErr: true,
},
"pv-was-added-but-pod's-pvcName-is-null": {
pod: st.MakePod().Name("pod_1").Namespace("default").Obj(),
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
expectedHint: framework.QueueSkip,
},
"new-pv-was-added-and-bound-to-pod's-pvc": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
expectedHint: framework.Queue,
},
"new-pv-was-added-and-bound-to-pod's-pvc-but-doesn't-have-labels": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
},
},
expectedHint: framework.QueueSkip,
},
"new-pv-was-added-but-not-bound-to-pod's-pvc": {
pod: createPodWithVolume("pod_1", "PVC_1"),
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_2",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
expectedHint: framework.QueueSkip,
},
"pv-was-updated-and-changed-key": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-b"},
},
},
expectedHint: framework.Queue,
},
"pv-was-updated-and-added-label": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a",
v1.LabelTopologyZone: "zone"},
},
},
expectedHint: framework.Queue,
},
"pv-was-updated-and-changed-labels-order": {
pod: createPodWithVolume("pod_1", "PVC_1"),
oldObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelFailureDomainBetaZone: "us-west1-a",
v1.LabelTopologyZone: "zone"},
},
},
newObj: &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "Vol_1",
Labels: map[string]string{v1.LabelTopologyZone: "zone",
v1.LabelFailureDomainBetaZone: "us-west1-a"},
},
},
expectedHint: framework.QueueSkip,
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
logger, _ := ktesting.NewTestContext(t)
p := &VolumeZone{
pvLister,
pvcLister,
nil,
}
got, err := p.isSchedulableAfterPersistentVolumeChange(logger, tc.pod, tc.oldObj, tc.newObj)
if err != nil && !tc.expectedErr {
t.Errorf("unexpected error: %v", err)
}
if got != tc.expectedHint {
t.Errorf("isSchedulableAfterPersistentVolumeChange() = %v, want %v", got, tc.expectedHint)
}
})
}
}
func BenchmarkVolumeZone(b *testing.B) {
tests := []struct {
Name string