diff --git a/plugin/pkg/scheduler/core/equivalence_cache.go b/plugin/pkg/scheduler/core/equivalence_cache.go index 9977fe18d30..fafe1298a82 100644 --- a/plugin/pkg/scheduler/core/equivalence_cache.go +++ b/plugin/pkg/scheduler/core/equivalence_cache.go @@ -188,6 +188,23 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod, // GeneralPredicates: will always be affected by adding a new pod invalidPredicates := sets.NewString("GeneralPredicates") + + // MaxPDVolumeCountPredicate: we check the volumes of pod to make decision. + for _, vol := range pod.Spec.Volumes { + if vol.PersistentVolumeClaim != nil { + invalidPredicates.Insert("MaxEBSVolumeCount", "MaxGCEPDVolumeCount", "MaxAzureDiskVolumeCount") + } else { + if vol.AWSElasticBlockStore != nil { + invalidPredicates.Insert("MaxEBSVolumeCount") + } + if vol.GCEPersistentDisk != nil { + invalidPredicates.Insert("MaxGCEPDVolumeCount") + } + if vol.AzureDisk != nil { + invalidPredicates.Insert("MaxAzureDiskVolumeCount") + } + } + } ec.InvalidateCachedPredicateItem(nodeName, invalidPredicates) } diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index 764f449ccc4..0c3449590b1 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -71,11 +71,11 @@ const ( ) var ( - serviceAffinitySet = sets.NewString("ServiceAffinity") - maxPDVolumeCountPredicateSet = sets.NewString("MaxPDVolumeCountPredicate") - matchInterPodAffinitySet = sets.NewString("MatchInterPodAffinity") - generalPredicatesSets = sets.NewString("GeneralPredicates") - noDiskConflictSet = sets.NewString("NoDiskConflict") + serviceAffinitySet = sets.NewString("ServiceAffinity") + matchInterPodAffinitySet = sets.NewString("MatchInterPodAffinity") + generalPredicatesSets = sets.NewString("GeneralPredicates") + noDiskConflictSet = sets.NewString("NoDiskConflict") + maxPDVolumeCountPredicateKeys = []string{"MaxGCEPDVolumeCount", "MaxAzureDiskVolumeCount", "MaxEBSVolumeCount"} ) // configFactory is the default implementation of the scheduler.Configurator interface. @@ -384,7 +384,11 @@ func (c *configFactory) onPvDelete(obj interface{}) { } func (c *configFactory) invalidatePredicatesForPv(pv *v1.PersistentVolume) { - invalidPredicates := sets.NewString("MaxPDVolumeCountPredicate") + // You could have a PVC that points to a PV, but the PV object doesn't exist. + // So when the PV object gets added, we can recount. + invalidPredicates := sets.NewString() + + // PV types which impact MaxPDVolumeCountPredicate if pv.Spec.AWSElasticBlockStore != nil { invalidPredicates.Insert("MaxEBSVolumeCount") } @@ -395,6 +399,14 @@ func (c *configFactory) invalidatePredicatesForPv(pv *v1.PersistentVolume) { invalidPredicates.Insert("MaxAzureDiskVolumeCount") } + // If PV contains zone related label, it may impact cached NoVolumeZoneConflict + for k := range pv.ObjectMeta.Labels { + if k == kubeletapis.LabelZoneFailureDomain || k == kubeletapis.LabelZoneRegion { + invalidPredicates.Insert("NoVolumeZoneConflict") + break + } + } + if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) { // Add/delete impacts the available PVs to choose from invalidPredicates.Insert(predicates.CheckVolumeBinding) @@ -458,24 +470,34 @@ func (c *configFactory) onPvcDelete(obj interface{}) { } func (c *configFactory) invalidatePredicatesForPvc(pvc *v1.PersistentVolumeClaim) { - if pvc.Spec.VolumeName != "" { - c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(maxPDVolumeCountPredicateSet) + // We need to do this here because the ecache uses PVC uid as part of equivalence hash of pod + // The binded volume type may change + invalidPredicates := sets.NewString(maxPDVolumeCountPredicateKeys...) + // // The binded volume's label may change + invalidPredicates.Insert("NoVolumeZoneConflict") + + if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) { + // Add/delete impacts the available PVs to choose from + invalidPredicates.Insert(predicates.CheckVolumeBinding) } + c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates) } func (c *configFactory) invalidatePredicatesForPvcUpdate(old, new *v1.PersistentVolumeClaim) { invalidPredicates := sets.NewString() - if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) { - if old.Spec.VolumeName != new.Spec.VolumeName { + if old.Spec.VolumeName != new.Spec.VolumeName { + if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) { // PVC volume binding has changed invalidPredicates.Insert(predicates.CheckVolumeBinding) } + // The binded volume type may change + invalidPredicates.Insert(maxPDVolumeCountPredicateKeys...) + // The binded volume's label may change + invalidPredicates.Insert("NoVolumeZoneConflict") } - if invalidPredicates.Len() > 0 { - c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates) - } + c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates) } func (c *configFactory) onServiceAdd(obj interface{}) { @@ -541,7 +563,7 @@ func (c *configFactory) addPodToCache(obj interface{}) { c.podQueue.AssignedPodAdded(pod) // NOTE: Updating equivalence cache of addPodToCache has been - // handled optimistically in InvalidateCachedPredicateItemForPodAdd. + // handled optimistically in: plugin/pkg/scheduler/scheduler.go#assume() } func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) {