diff --git a/pkg/scheduler/framework/plugins/volumebinding/volume_binding.go b/pkg/scheduler/framework/plugins/volumebinding/volume_binding.go index b600aea7bbc..2f60d25f571 100644 --- a/pkg/scheduler/framework/plugins/volumebinding/volume_binding.go +++ b/pkg/scheduler/framework/plugins/volumebinding/volume_binding.go @@ -99,9 +99,11 @@ func (pl *VolumeBinding) EventsToRegister() []framework.ClusterEventWithHint { // (e.g., allowedTopologies, volumeBindingMode), and hence may become // schedulable upon StorageClass Add or Update events. {Event: framework.ClusterEvent{Resource: framework.StorageClass, ActionType: framework.Add | framework.Update}}, + // We bind PVCs with PVs, so any changes may make the pods schedulable. - {Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add | framework.Update}}, + {Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPersistentVolumeClaimChange}, {Event: framework.ClusterEvent{Resource: framework.PersistentVolume, ActionType: framework.Add | framework.Update}}, + // Pods may fail to find available PVs because the node labels do not // match the storage class's allowed topologies or PV's node affinity. // A new or updated node may make pods schedulable. @@ -115,9 +117,11 @@ func (pl *VolumeBinding) EventsToRegister() []framework.ClusterEventWithHint { // We can remove UpdateNodeTaint when we remove the preCheck feature. // See: https://github.com/kubernetes/kubernetes/issues/110175 {Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}}, + // We rely on CSI node to translate in-tree PV to CSI. // TODO: kube-schduler will unregister the CSINode events once all the volume plugins has completed their CSI migration. {Event: framework.ClusterEvent{Resource: framework.CSINode, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterCSINodeChange}, + // When CSIStorageCapacity is enabled, pods may become schedulable // on CSI driver & storage capacity changes. {Event: framework.ClusterEvent{Resource: framework.CSIDriver, ActionType: framework.Add | framework.Update}}, @@ -151,6 +155,46 @@ func (pl *VolumeBinding) isSchedulableAfterCSINodeChange(logger klog.Logger, pod return framework.QueueSkip, nil } +func (pl *VolumeBinding) isSchedulableAfterPersistentVolumeClaimChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { + _, newPVC, err := util.As[*v1.PersistentVolumeClaim](oldObj, newObj) + if err != nil { + return framework.Queue, err + } + + logger = klog.LoggerWithValues( + logger, + "Pod", klog.KObj(pod), + "PersistentVolumeClaim", klog.KObj(newPVC), + ) + + if pod.Namespace != newPVC.Namespace { + logger.V(5).Info("PersistentVolumeClaim was created or updated, but it doesn't make this pod schedulable because the PVC belongs to a different namespace") + return framework.QueueSkip, nil + } + + for _, vol := range pod.Spec.Volumes { + var pvcName string + switch { + case vol.PersistentVolumeClaim != nil: + pvcName = vol.PersistentVolumeClaim.ClaimName + case vol.Ephemeral != nil: + pvcName = ephemeral.VolumeClaimName(pod, &vol) + default: + continue + } + + if pvcName == newPVC.Name { + // Return Queue because, in this case, + // all PVC creations and almost all PVC updates could make the Pod schedulable. + logger.V(5).Info("PersistentVolumeClaim the pod requires was created or updated, potentially making the target Pod schedulable") + return framework.Queue, nil + } + } + + logger.V(5).Info("PersistentVolumeClaim was created or updated, but it doesn't make this pod schedulable") + return framework.QueueSkip, nil +} + // podHasPVCs returns 2 values: // - the first one to denote if the given "pod" has any PVC defined. // - the second one to return any error if the requested PVC is illegal. diff --git a/pkg/scheduler/framework/plugins/volumebinding/volume_binding_test.go b/pkg/scheduler/framework/plugins/volumebinding/volume_binding_test.go index fa3fe7a6e2c..232db2d56aa 100644 --- a/pkg/scheduler/framework/plugins/volumebinding/volume_binding_test.go +++ b/pkg/scheduler/framework/plugins/volumebinding/volume_binding_test.go @@ -996,3 +996,100 @@ func TestIsSchedulableAfterCSINodeChange(t *testing.T) { }) } } + +func TestIsSchedulableAfterPersistentVolumeClaimChange(t *testing.T) { + table := []struct { + name string + pod *v1.Pod + oldPVC interface{} + newPVC interface{} + wantErr bool + expect framework.QueueingHint + }{ + { + name: "pod has no pvc or ephemeral volumes", + pod: makePod("pod-a").withEmptyDirVolume().Pod, + oldPVC: makePVC("pvc-b", "sc-a").PersistentVolumeClaim, + newPVC: makePVC("pvc-b", "sc-a").PersistentVolumeClaim, + wantErr: false, + expect: framework.QueueSkip, + }, + { + name: "pvc with the same name as the one used by the pod in a different namespace is modified", + pod: makePod("pod-a"). + withNamespace("ns-a"). + withPVCVolume("pvc-a", ""). + withPVCVolume("pvc-b", ""). + Pod, + oldPVC: nil, + newPVC: makePVC("pvc-b", "").PersistentVolumeClaim, + wantErr: false, + expect: framework.QueueSkip, + }, + { + name: "pod has no pvc that is being modified", + pod: makePod("pod-a"). + withPVCVolume("pvc-a", ""). + withPVCVolume("pvc-c", ""). + Pod, + oldPVC: makePVC("pvc-b", "").PersistentVolumeClaim, + newPVC: makePVC("pvc-b", "").PersistentVolumeClaim, + wantErr: false, + expect: framework.QueueSkip, + }, + { + name: "pod has no generic ephemeral volume that is being modified", + pod: makePod("pod-a"). + withGenericEphemeralVolume("ephemeral-a"). + withGenericEphemeralVolume("ephemeral-c"). + Pod, + oldPVC: makePVC("pod-a-ephemeral-b", "").PersistentVolumeClaim, + newPVC: makePVC("pod-a-ephemeral-b", "").PersistentVolumeClaim, + wantErr: false, + expect: framework.QueueSkip, + }, + { + name: "pod has the pvc that is being modified", + pod: makePod("pod-a"). + withPVCVolume("pvc-a", ""). + withPVCVolume("pvc-b", ""). + Pod, + oldPVC: makePVC("pvc-b", "").PersistentVolumeClaim, + newPVC: makePVC("pvc-b", "").PersistentVolumeClaim, + wantErr: false, + expect: framework.Queue, + }, + { + name: "pod has the generic ephemeral volume that is being modified", + pod: makePod("pod-a"). + withGenericEphemeralVolume("ephemeral-a"). + withGenericEphemeralVolume("ephemeral-b"). + Pod, + oldPVC: makePVC("pod-a-ephemeral-b", "").PersistentVolumeClaim, + newPVC: makePVC("pod-a-ephemeral-b", "").PersistentVolumeClaim, + wantErr: false, + expect: framework.Queue, + }, + { + name: "type conversion error", + oldPVC: new(struct{}), + newPVC: new(struct{}), + wantErr: true, + expect: framework.Queue, + }, + } + + for _, item := range table { + t.Run(item.name, func(t *testing.T) { + pl := &VolumeBinding{} + logger, _ := ktesting.NewTestContext(t) + qhint, err := pl.isSchedulableAfterPersistentVolumeClaimChange(logger, item.pod, item.oldPVC, item.newPVC) + if (err != nil) != item.wantErr { + t.Errorf("isSchedulableAfterPersistentVolumeClaimChange failed - got: %q", err) + } + if qhint != item.expect { + t.Errorf("QHint does not match: %v, want: %v", qhint, item.expect) + } + }) + } +}