From bc3c07091b3501ac52c7c322f246df5e36815c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E6=9C=B1=20=C2=B7=20Kiki?= Date: Mon, 22 Jul 2024 16:20:34 +0800 Subject: [PATCH] Fix a bug where the target pod doesn't become schedulable within 5 minutes when a deleted pod uses the same PVC with the ReadWriteOncePod access mode. (#126263) Co-authored-by: Kensei Nakada --- .../volumerestrictions/volume_restrictions.go | 22 ++++++++++++++++++- .../volume_restrictions_test.go | 16 ++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions.go b/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions.go index fd578b84240..192c65e8921 100644 --- a/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions.go +++ b/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions.go @@ -362,7 +362,6 @@ func (pl *VolumeRestrictions) isSchedulableAfterPersistentVolumeClaimAdded(logge // isSchedulableAfterPodDeleted is invoked whenever a pod deleted, // It checks whether the deleted pod will conflict with volumes of other pods on the same node -// TODO If we observe good throughput, we will add a check for conflicts between the deleted Pod and the readWriteOncePodPVC of the current Pod. func (pl *VolumeRestrictions) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { deletedPod, _, err := util.As[*v1.Pod](oldObj, newObj) if err != nil { @@ -375,9 +374,30 @@ func (pl *VolumeRestrictions) isSchedulableAfterPodDeleted(logger klog.Logger, p nodeInfo := framework.NewNodeInfo(deletedPod) if !satisfyVolumeConflicts(pod, nodeInfo) { + logger.V(5).Info("Pod with the volume that the target pod requires was deleted, which might make this pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod)) return framework.Queue, nil } + // Return Queue if a deleted pod uses the same PVC since the pod may be unschedulable due to the ReadWriteOncePod access mode of the PVC. + // + // For now, we don't actually fetch PVC and check the access mode because that operation could be expensive. + // Once the observability around QHint is established, + // we may want to do that depending on how much the operation would impact the QHint latency negatively. + // https://github.com/kubernetes/kubernetes/issues/124566 + claims := sets.New[string]() + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + claims.Insert(volume.PersistentVolumeClaim.ClaimName) + } + } + for _, volume := range deletedPod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil && claims.Has(volume.PersistentVolumeClaim.ClaimName) { + logger.V(5).Info("Pod with the same PVC that the target pod requires was deleted, which might make this pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod)) + return framework.Queue, nil + } + } + + logger.V(5).Info("An irrelevant Pod was deleted, which doesn't make this pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod)) return framework.QueueSkip, nil } diff --git a/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions_test.go b/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions_test.go index 1c2877aed75..6ea2144df33 100644 --- a/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions_test.go +++ b/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions_test.go @@ -640,6 +640,22 @@ func Test_isSchedulableAfterPodDeleted(t *testing.T) { expectedHint: framework.QueueSkip, expectedErr: false, }, + "queue-has-same-claim": { + pod: st.MakePod().Name("pod1").PVC("claim-rwop").Obj(), + oldObj: st.MakePod().Name("pod2").PVC("claim-rwop").Obj(), + existingPods: []*v1.Pod{}, + existingPVC: &v1.PersistentVolumeClaim{}, + expectedHint: framework.Queue, + expectedErr: false, + }, + "skip-no-same-claim": { + pod: st.MakePod().Name("pod1").PVC("claim-1-rwop").Obj(), + oldObj: st.MakePod().Name("pod2").PVC("claim-2-rwop").Obj(), + existingPods: []*v1.Pod{}, + existingPVC: &v1.PersistentVolumeClaim{}, + expectedHint: framework.QueueSkip, + expectedErr: false, + }, } for name, tc := range testcases {