mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
feat(scheduling): move csi volume predicates to csi filter plugin
This commit is contained in:
parent
13133857af
commit
291a7bb164
@ -9,7 +9,6 @@ load(
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"csi_volume_predicate.go",
|
||||
"error.go",
|
||||
"metadata.go",
|
||||
"predicates.go",
|
||||
@ -34,7 +33,6 @@ go_library(
|
||||
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
|
||||
"//staging/src/k8s.io/csi-translation-lib:go_default_library",
|
||||
"//staging/src/k8s.io/csi-translation-lib/plugins:go_default_library",
|
||||
"//vendor/k8s.io/klog:go_default_library",
|
||||
],
|
||||
@ -43,7 +41,6 @@ go_library(
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = [
|
||||
"csi_volume_predicate_test.go",
|
||||
"max_attachable_volume_predicate_test.go",
|
||||
"metadata_test.go",
|
||||
"predicates_test.go",
|
||||
|
@ -1,285 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package predicates
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/util/rand"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
storagelisters "k8s.io/client-go/listers/storage/v1"
|
||||
csitrans "k8s.io/csi-translation-lib"
|
||||
"k8s.io/klog"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
||||
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
||||
)
|
||||
|
||||
// InTreeToCSITranslator contains methods required to check migratable status
|
||||
// and perform translations from InTree PV's to CSI
|
||||
type InTreeToCSITranslator interface {
|
||||
IsPVMigratable(pv *v1.PersistentVolume) bool
|
||||
IsMigratableIntreePluginByName(inTreePluginName string) bool
|
||||
GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
|
||||
GetCSINameFromInTreeName(pluginName string) (string, error)
|
||||
TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
|
||||
}
|
||||
|
||||
// CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
|
||||
type CSIMaxVolumeLimitChecker struct {
|
||||
csiNodeLister storagelisters.CSINodeLister
|
||||
pvLister corelisters.PersistentVolumeLister
|
||||
pvcLister corelisters.PersistentVolumeClaimLister
|
||||
scLister storagelisters.StorageClassLister
|
||||
|
||||
randomVolumeIDPrefix string
|
||||
|
||||
translator InTreeToCSITranslator
|
||||
}
|
||||
|
||||
// NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
|
||||
func NewCSIMaxVolumeLimitPredicate(
|
||||
csiNodeLister storagelisters.CSINodeLister, pvLister corelisters.PersistentVolumeLister, pvcLister corelisters.PersistentVolumeClaimLister, scLister storagelisters.StorageClassLister) FitPredicate {
|
||||
c := &CSIMaxVolumeLimitChecker{
|
||||
csiNodeLister: csiNodeLister,
|
||||
pvLister: pvLister,
|
||||
pvcLister: pvcLister,
|
||||
scLister: scLister,
|
||||
randomVolumeIDPrefix: rand.String(32),
|
||||
translator: csitrans.New(),
|
||||
}
|
||||
return c.attachableLimitPredicate
|
||||
}
|
||||
|
||||
func getVolumeLimits(nodeInfo *schedulernodeinfo.NodeInfo, csiNode *storagev1.CSINode) map[v1.ResourceName]int64 {
|
||||
// TODO: stop getting values from Node object in v1.18
|
||||
nodeVolumeLimits := nodeInfo.VolumeLimits()
|
||||
if csiNode != nil {
|
||||
for i := range csiNode.Spec.Drivers {
|
||||
d := csiNode.Spec.Drivers[i]
|
||||
if d.Allocatable != nil && d.Allocatable.Count != nil {
|
||||
// TODO: drop GetCSIAttachLimitKey once we don't get values from Node object (v1.18)
|
||||
k := v1.ResourceName(volumeutil.GetCSIAttachLimitKey(d.Name))
|
||||
nodeVolumeLimits[k] = int64(*d.Allocatable.Count)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nodeVolumeLimits
|
||||
}
|
||||
|
||||
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
|
||||
pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
|
||||
// If the new pod doesn't have any volume attached to it, the predicate will always be true
|
||||
if len(pod.Spec.Volumes) == 0 {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return false, nil, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
// If CSINode doesn't exist, the predicate may read the limits from Node object
|
||||
csiNode, err := c.csiNodeLister.Get(node.Name)
|
||||
if err != nil {
|
||||
// TODO: return the error once CSINode is created by default (2 releases)
|
||||
klog.V(5).Infof("Could not get a CSINode object for the node: %v", err)
|
||||
}
|
||||
|
||||
newVolumes := make(map[string]string)
|
||||
if err := c.filterAttachableVolumes(csiNode, pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
|
||||
// If the pod doesn't have any new CSI volumes, the predicate will always be true
|
||||
if len(newVolumes) == 0 {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
// If the node doesn't have volume limits, the predicate will always be true
|
||||
nodeVolumeLimits := getVolumeLimits(nodeInfo, csiNode)
|
||||
if len(nodeVolumeLimits) == 0 {
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
attachedVolumes := make(map[string]string)
|
||||
for _, existingPod := range nodeInfo.Pods() {
|
||||
if err := c.filterAttachableVolumes(csiNode, existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
attachedVolumeCount := map[string]int{}
|
||||
for volumeUniqueName, volumeLimitKey := range attachedVolumes {
|
||||
if _, ok := newVolumes[volumeUniqueName]; ok {
|
||||
// Don't count single volume used in multiple pods more than once
|
||||
delete(newVolumes, volumeUniqueName)
|
||||
}
|
||||
attachedVolumeCount[volumeLimitKey]++
|
||||
}
|
||||
|
||||
newVolumeCount := map[string]int{}
|
||||
for _, volumeLimitKey := range newVolumes {
|
||||
newVolumeCount[volumeLimitKey]++
|
||||
}
|
||||
|
||||
for volumeLimitKey, count := range newVolumeCount {
|
||||
maxVolumeLimit, ok := nodeVolumeLimits[v1.ResourceName(volumeLimitKey)]
|
||||
if ok {
|
||||
currentVolumeCount := attachedVolumeCount[volumeLimitKey]
|
||||
if currentVolumeCount+count > int(maxVolumeLimit) {
|
||||
return false, []PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
|
||||
csiNode *storagev1.CSINode, volumes []v1.Volume, namespace string, result map[string]string) error {
|
||||
for _, vol := range volumes {
|
||||
// CSI volumes can only be used as persistent volumes
|
||||
if vol.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
pvcName := vol.PersistentVolumeClaim.ClaimName
|
||||
|
||||
if pvcName == "" {
|
||||
return fmt.Errorf("PersistentVolumeClaim had no name")
|
||||
}
|
||||
|
||||
pvc, err := c.pvcLister.PersistentVolumeClaims(namespace).Get(pvcName)
|
||||
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
|
||||
continue
|
||||
}
|
||||
|
||||
driverName, volumeHandle := c.getCSIDriverInfo(csiNode, pvc)
|
||||
if driverName == "" || volumeHandle == "" {
|
||||
klog.V(5).Infof("Could not find a CSI driver name or volume handle, not counting volume")
|
||||
continue
|
||||
}
|
||||
|
||||
volumeUniqueName := fmt.Sprintf("%s/%s", driverName, volumeHandle)
|
||||
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
|
||||
result[volumeUniqueName] = volumeLimitKey
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getCSIDriverInfo returns the CSI driver name and volume ID of a given PVC.
|
||||
// If the PVC is from a migrated in-tree plugin, this function will return
|
||||
// the information of the CSI driver that the plugin has been migrated to.
|
||||
func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfo(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
|
||||
pvName := pvc.Spec.VolumeName
|
||||
namespace := pvc.Namespace
|
||||
pvcName := pvc.Name
|
||||
|
||||
if pvName == "" {
|
||||
klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
|
||||
return c.getCSIDriverInfoFromSC(csiNode, pvc)
|
||||
}
|
||||
|
||||
pv, err := c.pvLister.Get(pvName)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
|
||||
// If we can't fetch PV associated with PVC, may be it got deleted
|
||||
// or PVC was prebound to a PVC that hasn't been created yet.
|
||||
// fallback to using StorageClass for volume counting
|
||||
return c.getCSIDriverInfoFromSC(csiNode, pvc)
|
||||
}
|
||||
|
||||
csiSource := pv.Spec.PersistentVolumeSource.CSI
|
||||
if csiSource == nil {
|
||||
// We make a fast path for non-CSI volumes that aren't migratable
|
||||
if !c.translator.IsPVMigratable(pv) {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
pluginName, err := c.translator.GetInTreePluginNameFromSpec(pv, nil)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up plugin name from PV spec: %v", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if !isCSIMigrationOn(csiNode, pluginName) {
|
||||
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", pluginName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
csiPV, err := c.translator.TranslateInTreePVToCSI(pv)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to translate in-tree volume to CSI: %v", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if csiPV.Spec.PersistentVolumeSource.CSI == nil {
|
||||
klog.V(5).Infof("Unable to get a valid volume source for translated PV %s", pvName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
csiSource = csiPV.Spec.PersistentVolumeSource.CSI
|
||||
}
|
||||
|
||||
return csiSource.Driver, csiSource.VolumeHandle
|
||||
}
|
||||
|
||||
// getCSIDriverInfoFromSC returns the CSI driver name and a random volume ID of a given PVC's StorageClass.
|
||||
func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfoFromSC(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
|
||||
namespace := pvc.Namespace
|
||||
pvcName := pvc.Name
|
||||
scName := v1helper.GetPersistentVolumeClaimClass(pvc)
|
||||
|
||||
// If StorageClass is not set or not found, then PVC must be using immediate binding mode
|
||||
// and hence it must be bound before scheduling. So it is safe to not count it.
|
||||
if scName == "" {
|
||||
klog.V(5).Infof("PVC %s/%s has no StorageClass", namespace, pvcName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
storageClass, err := c.scLister.Get(scName)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Could not get StorageClass for PVC %s/%s: %v", namespace, pvcName, err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// We use random prefix to avoid conflict with volume IDs. If PVC is bound during the execution of the
|
||||
// predicate and there is another pod on the same node that uses same volume, then we will overcount
|
||||
// the volume and consider both volumes as different.
|
||||
volumeHandle := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName)
|
||||
|
||||
provisioner := storageClass.Provisioner
|
||||
if c.translator.IsMigratableIntreePluginByName(provisioner) {
|
||||
if !isCSIMigrationOn(csiNode, provisioner) {
|
||||
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", provisioner)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
driverName, err := c.translator.GetCSINameFromInTreeName(provisioner)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up driver name from plugin name: %v", err)
|
||||
return "", ""
|
||||
}
|
||||
return driverName, volumeHandle
|
||||
}
|
||||
|
||||
return provisioner, volumeHandle
|
||||
}
|
@ -1,576 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package predicates
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
csilibplugins "k8s.io/csi-translation-lib/plugins"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
fakelisters "k8s.io/kubernetes/pkg/scheduler/listers/fake"
|
||||
)
|
||||
|
||||
const (
|
||||
ebsCSIDriverName = csilibplugins.AWSEBSDriverName
|
||||
gceCSIDriverName = csilibplugins.GCEPDDriverName
|
||||
|
||||
hostpathInTreePluginName = "kubernetes.io/hostpath"
|
||||
)
|
||||
|
||||
func TestCSIVolumeCountPredicate(t *testing.T) {
|
||||
runningPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-ebs.csi.aws.com-3",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
pendingVolumePod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-4",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Different pod than pendingVolumePod, but using the same unbound PVC
|
||||
unboundPVCPod2 := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-4",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
missingPVPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-6",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
noSCPVCPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-5",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
gceTwoVolPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-pd.csi.storage.gke.io-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-pd.csi.storage.gke.io-2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
// In-tree volumes
|
||||
inTreeOneVolPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-kubernetes.io/aws-ebs-0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
inTreeTwoVolPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-kubernetes.io/aws-ebs-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-kubernetes.io/aws-ebs-2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
// pods with matching csi driver names
|
||||
csiEBSOneVolPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-ebs.csi.aws.com-0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
csiEBSTwoVolPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-ebs.csi.aws.com-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-ebs.csi.aws.com-2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
inTreeNonMigratableOneVolPod := &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "csi-kubernetes.io/hostpath-0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
newPod *v1.Pod
|
||||
existingPods []*v1.Pod
|
||||
filterName string
|
||||
maxVols int
|
||||
driverNames []string
|
||||
fits bool
|
||||
test string
|
||||
migrationEnabled bool
|
||||
limitSource string
|
||||
expectedFailureReason *PredicateFailureError
|
||||
}{
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{runningPod, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 4,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: true,
|
||||
test: "fits when node volume limit >= new pods CSI volume",
|
||||
limitSource: "node",
|
||||
},
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{runningPod, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: false,
|
||||
test: "doesn't when node volume limit <= pods CSI volume",
|
||||
limitSource: "node",
|
||||
},
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{runningPod, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: true,
|
||||
test: "should when driver does not support volume limits",
|
||||
limitSource: "csinode-with-no-limit",
|
||||
},
|
||||
// should count pending PVCs
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{pendingVolumePod, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: false,
|
||||
test: "count pending PVCs towards volume limit <= pods CSI volume",
|
||||
limitSource: "node",
|
||||
},
|
||||
// two same pending PVCs should be counted as 1
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{pendingVolumePod, unboundPVCPod2, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 4,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: true,
|
||||
test: "count multiple pending pvcs towards volume limit >= pods CSI volume",
|
||||
limitSource: "node",
|
||||
},
|
||||
// should count PVCs with invalid PV name but valid SC
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{missingPVPod, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: false,
|
||||
test: "should count PVCs with invalid PV name but valid SC",
|
||||
limitSource: "node",
|
||||
},
|
||||
// don't count a volume which has storageclass missing
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{runningPod, noSCPVCPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName},
|
||||
fits: true,
|
||||
test: "don't count pvcs with missing SC towards volume limit",
|
||||
limitSource: "node",
|
||||
},
|
||||
// don't count multiple volume types
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{gceTwoVolPod, csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName, gceCSIDriverName},
|
||||
fits: false,
|
||||
test: "count pvcs with the same type towards volume limit",
|
||||
limitSource: "node",
|
||||
},
|
||||
{
|
||||
newPod: gceTwoVolPod,
|
||||
existingPods: []*v1.Pod{csiEBSTwoVolPod, runningPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{ebsCSIDriverName, gceCSIDriverName},
|
||||
fits: true,
|
||||
test: "don't count pvcs with different type towards volume limit",
|
||||
limitSource: "node",
|
||||
},
|
||||
// Tests for in-tree volume migration
|
||||
{
|
||||
newPod: inTreeOneVolPod,
|
||||
existingPods: []*v1.Pod{inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: false,
|
||||
migrationEnabled: true,
|
||||
limitSource: "csinode",
|
||||
test: "should count in-tree volumes if migration is enabled",
|
||||
},
|
||||
{
|
||||
newPod: pendingVolumePod,
|
||||
existingPods: []*v1.Pod{inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: false,
|
||||
migrationEnabled: true,
|
||||
limitSource: "csinode",
|
||||
test: "should count unbound in-tree volumes if migration is enabled",
|
||||
},
|
||||
{
|
||||
newPod: inTreeOneVolPod,
|
||||
existingPods: []*v1.Pod{inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: true,
|
||||
migrationEnabled: false,
|
||||
limitSource: "csinode",
|
||||
test: "should not count in-tree volume if migration is disabled",
|
||||
},
|
||||
{
|
||||
newPod: inTreeOneVolPod,
|
||||
existingPods: []*v1.Pod{inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: true,
|
||||
migrationEnabled: true,
|
||||
limitSource: "csinode-with-no-limit",
|
||||
test: "should not limit pod if volume used does not report limits",
|
||||
},
|
||||
{
|
||||
newPod: inTreeOneVolPod,
|
||||
existingPods: []*v1.Pod{inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: true,
|
||||
migrationEnabled: false,
|
||||
limitSource: "csinode-with-no-limit",
|
||||
test: "should not limit in-tree pod if migration is disabled",
|
||||
},
|
||||
{
|
||||
newPod: inTreeNonMigratableOneVolPod,
|
||||
existingPods: []*v1.Pod{csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{hostpathInTreePluginName, ebsCSIDriverName},
|
||||
fits: true,
|
||||
migrationEnabled: true,
|
||||
limitSource: "csinode",
|
||||
test: "should not count non-migratable in-tree volumes",
|
||||
},
|
||||
// mixed volumes
|
||||
{
|
||||
newPod: inTreeOneVolPod,
|
||||
existingPods: []*v1.Pod{csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: false,
|
||||
migrationEnabled: true,
|
||||
limitSource: "csinode",
|
||||
test: "should count in-tree and csi volumes if migration is enabled (when scheduling in-tree volumes)",
|
||||
},
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: false,
|
||||
migrationEnabled: true,
|
||||
limitSource: "csinode",
|
||||
test: "should count in-tree and csi volumes if migration is enabled (when scheduling csi volumes)",
|
||||
},
|
||||
{
|
||||
newPod: csiEBSOneVolPod,
|
||||
existingPods: []*v1.Pod{csiEBSTwoVolPod, inTreeTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 3,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: true,
|
||||
migrationEnabled: false,
|
||||
limitSource: "csinode",
|
||||
test: "should not count in-tree and count csi volumes if migration is disabled (when scheduling csi volumes)",
|
||||
},
|
||||
{
|
||||
newPod: inTreeOneVolPod,
|
||||
existingPods: []*v1.Pod{csiEBSTwoVolPod},
|
||||
filterName: "csi",
|
||||
maxVols: 2,
|
||||
driverNames: []string{csilibplugins.AWSEBSInTreePluginName, ebsCSIDriverName},
|
||||
fits: true,
|
||||
migrationEnabled: false,
|
||||
limitSource: "csinode",
|
||||
test: "should not count in-tree and count csi volumes if migration is disabled (when scheduling in-tree volumes)",
|
||||
},
|
||||
}
|
||||
|
||||
// running attachable predicate tests with feature gate and limit present on nodes
|
||||
for _, test := range tests {
|
||||
t.Run(test.test, func(t *testing.T) {
|
||||
node, csiNode := getNodeWithPodAndVolumeLimits(test.limitSource, test.existingPods, int64(test.maxVols), test.driverNames...)
|
||||
if test.migrationEnabled {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.CSIMigration, true)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.CSIMigrationAWS, true)()
|
||||
enableMigrationOnNode(csiNode, csilibplugins.AWSEBSInTreePluginName)
|
||||
} else {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.CSIMigration, false)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.CSIMigrationAWS, false)()
|
||||
}
|
||||
|
||||
expectedFailureReasons := []PredicateFailureReason{ErrMaxVolumeCountExceeded}
|
||||
if test.expectedFailureReason != nil {
|
||||
expectedFailureReasons = []PredicateFailureReason{test.expectedFailureReason}
|
||||
}
|
||||
|
||||
pred := NewCSIMaxVolumeLimitPredicate(getFakeCSINodeLister(csiNode),
|
||||
getFakeCSIPVLister(test.filterName, test.driverNames...),
|
||||
getFakeCSIPVCLister(test.filterName, "csi-sc", test.driverNames...),
|
||||
getFakeCSIStorageClassLister("csi-sc", test.driverNames[0]))
|
||||
|
||||
fits, reasons, err := pred(test.newPod, nil, node)
|
||||
if err != nil {
|
||||
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
|
||||
}
|
||||
if !fits && !reflect.DeepEqual(expectedFailureReasons, reasons) {
|
||||
t.Errorf("Using allocatable [%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
|
||||
}
|
||||
if fits != test.fits {
|
||||
t.Errorf("Using allocatable [%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func getFakeCSIPVLister(volumeName string, driverNames ...string) fakelisters.PersistentVolumeLister {
|
||||
pvLister := fakelisters.PersistentVolumeLister{}
|
||||
for _, driver := range driverNames {
|
||||
for j := 0; j < 4; j++ {
|
||||
volumeHandle := fmt.Sprintf("%s-%s-%d", volumeName, driver, j)
|
||||
pv := v1.PersistentVolume{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: volumeHandle},
|
||||
Spec: v1.PersistentVolumeSpec{
|
||||
PersistentVolumeSource: v1.PersistentVolumeSource{
|
||||
CSI: &v1.CSIPersistentVolumeSource{
|
||||
Driver: driver,
|
||||
VolumeHandle: volumeHandle,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
switch driver {
|
||||
case csilibplugins.AWSEBSInTreePluginName:
|
||||
pv.Spec.PersistentVolumeSource = v1.PersistentVolumeSource{
|
||||
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{
|
||||
VolumeID: volumeHandle,
|
||||
},
|
||||
}
|
||||
case hostpathInTreePluginName:
|
||||
pv.Spec.PersistentVolumeSource = v1.PersistentVolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{
|
||||
Path: "/tmp",
|
||||
},
|
||||
}
|
||||
default:
|
||||
pv.Spec.PersistentVolumeSource = v1.PersistentVolumeSource{
|
||||
CSI: &v1.CSIPersistentVolumeSource{
|
||||
Driver: driver,
|
||||
VolumeHandle: volumeHandle,
|
||||
},
|
||||
}
|
||||
}
|
||||
pvLister = append(pvLister, pv)
|
||||
}
|
||||
|
||||
}
|
||||
return pvLister
|
||||
}
|
||||
|
||||
func getFakeCSIPVCLister(volumeName, scName string, driverNames ...string) fakelisters.PersistentVolumeClaimLister {
|
||||
pvcLister := fakelisters.PersistentVolumeClaimLister{}
|
||||
for _, driver := range driverNames {
|
||||
for j := 0; j < 4; j++ {
|
||||
v := fmt.Sprintf("%s-%s-%d", volumeName, driver, j)
|
||||
pvc := v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: v},
|
||||
Spec: v1.PersistentVolumeClaimSpec{VolumeName: v},
|
||||
}
|
||||
pvcLister = append(pvcLister, pvc)
|
||||
}
|
||||
}
|
||||
|
||||
pvcLister = append(pvcLister, v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-4"},
|
||||
Spec: v1.PersistentVolumeClaimSpec{StorageClassName: &scName},
|
||||
})
|
||||
pvcLister = append(pvcLister, v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-5"},
|
||||
Spec: v1.PersistentVolumeClaimSpec{},
|
||||
})
|
||||
// a pvc with missing PV but available storageclass.
|
||||
pvcLister = append(pvcLister, v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-6"},
|
||||
Spec: v1.PersistentVolumeClaimSpec{StorageClassName: &scName, VolumeName: "missing-in-action"},
|
||||
})
|
||||
return pvcLister
|
||||
}
|
||||
|
||||
func enableMigrationOnNode(csiNode *storagev1.CSINode, pluginName string) {
|
||||
nodeInfoAnnotations := csiNode.GetAnnotations()
|
||||
if nodeInfoAnnotations == nil {
|
||||
nodeInfoAnnotations = map[string]string{}
|
||||
}
|
||||
|
||||
newAnnotationSet := sets.NewString()
|
||||
newAnnotationSet.Insert(pluginName)
|
||||
nas := strings.Join(newAnnotationSet.List(), ",")
|
||||
nodeInfoAnnotations[v1.MigratedPluginsAnnotationKey] = nas
|
||||
|
||||
csiNode.Annotations = nodeInfoAnnotations
|
||||
}
|
||||
|
||||
func getFakeCSIStorageClassLister(scName, provisionerName string) fakelisters.StorageClassLister {
|
||||
return fakelisters.StorageClassLister{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: scName},
|
||||
Provisioner: provisionerName,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func getFakeCSINodeLister(csiNode *storagev1.CSINode) fakelisters.CSINodeLister {
|
||||
if csiNode != nil {
|
||||
return fakelisters.CSINodeLister(*csiNode)
|
||||
}
|
||||
return fakelisters.CSINodeLister{}
|
||||
}
|
@ -1134,3 +1134,10 @@ func getNodeWithPodAndVolumeLimits(limitSource string, pods []*v1.Pod, limit int
|
||||
nodeInfo.SetNode(node)
|
||||
return nodeInfo, csiNode
|
||||
}
|
||||
|
||||
func getFakeCSINodeLister(csiNode *storagev1.CSINode) fakelisters.CSINodeLister {
|
||||
if csiNode != nil {
|
||||
return fakelisters.CSINodeLister(*csiNode)
|
||||
}
|
||||
return fakelisters.CSINodeLister{}
|
||||
}
|
||||
|
@ -9,20 +9,30 @@ go_library(
|
||||
"csinode_helper.go",
|
||||
"ebs.go",
|
||||
"gce.go",
|
||||
"utils.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/apis/core/v1/helper:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/scheduler/algorithm/predicates:go_default_library",
|
||||
"//pkg/scheduler/framework/plugins/migration:go_default_library",
|
||||
"//pkg/scheduler/framework/v1alpha1:go_default_library",
|
||||
"//pkg/scheduler/nodeinfo:go_default_library",
|
||||
"//pkg/volume/util:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/storage/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/informers:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
|
||||
"//staging/src/k8s.io/csi-translation-lib:go_default_library",
|
||||
"//staging/src/k8s.io/csi-translation-lib/plugins:go_default_library",
|
||||
"//vendor/k8s.io/klog:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
@ -47,9 +57,11 @@ go_test(
|
||||
"//staging/src/k8s.io/api/storage/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
|
||||
"//staging/src/k8s.io/csi-translation-lib:go_default_library",
|
||||
"//staging/src/k8s.io/csi-translation-lib/plugins:go_default_library",
|
||||
"//vendor/k8s.io/utils/pointer:go_default_library",
|
||||
],
|
||||
|
@ -18,18 +18,45 @@ package nodevolumelimits
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/rand"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
storagelisters "k8s.io/client-go/listers/storage/v1"
|
||||
csitrans "k8s.io/csi-translation-lib"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/migration"
|
||||
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
||||
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
||||
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
// InTreeToCSITranslator contains methods required to check migratable status
|
||||
// and perform translations from InTree PV's to CSI
|
||||
type InTreeToCSITranslator interface {
|
||||
IsPVMigratable(pv *v1.PersistentVolume) bool
|
||||
IsMigratableIntreePluginByName(inTreePluginName string) bool
|
||||
GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
|
||||
GetCSINameFromInTreeName(pluginName string) (string, error)
|
||||
TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
|
||||
}
|
||||
|
||||
// CSILimits is a plugin that checks node volume limits.
|
||||
type CSILimits struct {
|
||||
predicate predicates.FitPredicate
|
||||
csiNodeLister storagelisters.CSINodeLister
|
||||
pvLister corelisters.PersistentVolumeLister
|
||||
pvcLister corelisters.PersistentVolumeClaimLister
|
||||
scLister storagelisters.StorageClassLister
|
||||
|
||||
randomVolumeIDPrefix string
|
||||
|
||||
translator InTreeToCSITranslator
|
||||
}
|
||||
|
||||
var _ framework.FilterPlugin = &CSILimits{}
|
||||
@ -44,9 +71,203 @@ func (pl *CSILimits) Name() string {
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *CSILimits) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
|
||||
// metadata is not needed
|
||||
_, reasons, err := pl.predicate(pod, nil, nodeInfo)
|
||||
return migration.PredicateResultToFrameworkStatus(reasons, err)
|
||||
// If the new pod doesn't have any volume attached to it, the predicate will always be true
|
||||
if len(pod.Spec.Volumes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return framework.NewStatus(framework.Error, fmt.Sprintf("node not found"))
|
||||
}
|
||||
|
||||
// If CSINode doesn't exist, the predicate may read the limits from Node object
|
||||
csiNode, err := pl.csiNodeLister.Get(node.Name)
|
||||
if err != nil {
|
||||
// TODO: return the error once CSINode is created by default (2 releases)
|
||||
klog.V(5).Infof("Could not get a CSINode object for the node: %v", err)
|
||||
}
|
||||
|
||||
newVolumes := make(map[string]string)
|
||||
if err := pl.filterAttachableVolumes(csiNode, pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
|
||||
return framework.NewStatus(framework.Error, err.Error())
|
||||
}
|
||||
|
||||
// If the pod doesn't have any new CSI volumes, the predicate will always be true
|
||||
if len(newVolumes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the node doesn't have volume limits, the predicate will always be true
|
||||
nodeVolumeLimits := getVolumeLimits(nodeInfo, csiNode)
|
||||
if len(nodeVolumeLimits) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
attachedVolumes := make(map[string]string)
|
||||
for _, existingPod := range nodeInfo.Pods() {
|
||||
if err := pl.filterAttachableVolumes(csiNode, existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
|
||||
return framework.NewStatus(framework.Error, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
attachedVolumeCount := map[string]int{}
|
||||
for volumeUniqueName, volumeLimitKey := range attachedVolumes {
|
||||
if _, ok := newVolumes[volumeUniqueName]; ok {
|
||||
// Don't count single volume used in multiple pods more than once
|
||||
delete(newVolumes, volumeUniqueName)
|
||||
}
|
||||
attachedVolumeCount[volumeLimitKey]++
|
||||
}
|
||||
|
||||
newVolumeCount := map[string]int{}
|
||||
for _, volumeLimitKey := range newVolumes {
|
||||
newVolumeCount[volumeLimitKey]++
|
||||
}
|
||||
|
||||
for volumeLimitKey, count := range newVolumeCount {
|
||||
maxVolumeLimit, ok := nodeVolumeLimits[v1.ResourceName(volumeLimitKey)]
|
||||
if ok {
|
||||
currentVolumeCount := attachedVolumeCount[volumeLimitKey]
|
||||
if currentVolumeCount+count > int(maxVolumeLimit) {
|
||||
return framework.NewStatus(framework.Unschedulable, predicates.ErrMaxVolumeCountExceeded.GetReason())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pl *CSILimits) filterAttachableVolumes(
|
||||
csiNode *storagev1.CSINode, volumes []v1.Volume, namespace string, result map[string]string) error {
|
||||
for _, vol := range volumes {
|
||||
// CSI volumes can only be used as persistent volumes
|
||||
if vol.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
pvcName := vol.PersistentVolumeClaim.ClaimName
|
||||
|
||||
if pvcName == "" {
|
||||
return fmt.Errorf("PersistentVolumeClaim had no name")
|
||||
}
|
||||
|
||||
pvc, err := pl.pvcLister.PersistentVolumeClaims(namespace).Get(pvcName)
|
||||
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
|
||||
continue
|
||||
}
|
||||
|
||||
driverName, volumeHandle := pl.getCSIDriverInfo(csiNode, pvc)
|
||||
if driverName == "" || volumeHandle == "" {
|
||||
klog.V(5).Infof("Could not find a CSI driver name or volume handle, not counting volume")
|
||||
continue
|
||||
}
|
||||
|
||||
volumeUniqueName := fmt.Sprintf("%s/%s", driverName, volumeHandle)
|
||||
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
|
||||
result[volumeUniqueName] = volumeLimitKey
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getCSIDriverInfo returns the CSI driver name and volume ID of a given PVC.
|
||||
// If the PVC is from a migrated in-tree plugin, this function will return
|
||||
// the information of the CSI driver that the plugin has been migrated to.
|
||||
func (pl *CSILimits) getCSIDriverInfo(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
|
||||
pvName := pvc.Spec.VolumeName
|
||||
namespace := pvc.Namespace
|
||||
pvcName := pvc.Name
|
||||
|
||||
if pvName == "" {
|
||||
klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
|
||||
return pl.getCSIDriverInfoFromSC(csiNode, pvc)
|
||||
}
|
||||
|
||||
pv, err := pl.pvLister.Get(pvName)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
|
||||
// If we can't fetch PV associated with PVC, may be it got deleted
|
||||
// or PVC was prebound to a PVC that hasn't been created yet.
|
||||
// fallback to using StorageClass for volume counting
|
||||
return pl.getCSIDriverInfoFromSC(csiNode, pvc)
|
||||
}
|
||||
|
||||
csiSource := pv.Spec.PersistentVolumeSource.CSI
|
||||
if csiSource == nil {
|
||||
// We make a fast path for non-CSI volumes that aren't migratable
|
||||
if !pl.translator.IsPVMigratable(pv) {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
pluginName, err := pl.translator.GetInTreePluginNameFromSpec(pv, nil)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up plugin name from PV spec: %v", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if !isCSIMigrationOn(csiNode, pluginName) {
|
||||
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", pluginName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
csiPV, err := pl.translator.TranslateInTreePVToCSI(pv)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to translate in-tree volume to CSI: %v", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if csiPV.Spec.PersistentVolumeSource.CSI == nil {
|
||||
klog.V(5).Infof("Unable to get a valid volume source for translated PV %s", pvName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
csiSource = csiPV.Spec.PersistentVolumeSource.CSI
|
||||
}
|
||||
|
||||
return csiSource.Driver, csiSource.VolumeHandle
|
||||
}
|
||||
|
||||
// getCSIDriverInfoFromSC returns the CSI driver name and a random volume ID of a given PVC's StorageClass.
|
||||
func (pl *CSILimits) getCSIDriverInfoFromSC(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
|
||||
namespace := pvc.Namespace
|
||||
pvcName := pvc.Name
|
||||
scName := v1helper.GetPersistentVolumeClaimClass(pvc)
|
||||
|
||||
// If StorageClass is not set or not found, then PVC must be using immediate binding mode
|
||||
// and hence it must be bound before scheduling. So it is safe to not count it.
|
||||
if scName == "" {
|
||||
klog.V(5).Infof("PVC %s/%s has no StorageClass", namespace, pvcName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
storageClass, err := pl.scLister.Get(scName)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Could not get StorageClass for PVC %s/%s: %v", namespace, pvcName, err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// We use random prefix to avoid conflict with volume IDs. If PVC is bound during the execution of the
|
||||
// predicate and there is another pod on the same node that uses same volume, then we will overcount
|
||||
// the volume and consider both volumes as different.
|
||||
volumeHandle := fmt.Sprintf("%s-%s/%s", pl.randomVolumeIDPrefix, namespace, pvcName)
|
||||
|
||||
provisioner := storageClass.Provisioner
|
||||
if pl.translator.IsMigratableIntreePluginByName(provisioner) {
|
||||
if !isCSIMigrationOn(csiNode, provisioner) {
|
||||
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", provisioner)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
driverName, err := pl.translator.GetCSINameFromInTreeName(provisioner)
|
||||
if err != nil {
|
||||
klog.V(5).Infof("Unable to look up driver name from plugin name: %v", err)
|
||||
return "", ""
|
||||
}
|
||||
return driverName, volumeHandle
|
||||
}
|
||||
|
||||
return provisioner, volumeHandle
|
||||
}
|
||||
|
||||
// NewCSI initializes a new plugin and returns it.
|
||||
@ -57,6 +278,27 @@ func NewCSI(_ *runtime.Unknown, handle framework.FrameworkHandle) (framework.Plu
|
||||
scLister := informerFactory.Storage().V1().StorageClasses().Lister()
|
||||
|
||||
return &CSILimits{
|
||||
predicate: predicates.NewCSIMaxVolumeLimitPredicate(getCSINodeListerIfEnabled(informerFactory), pvLister, pvcLister, scLister),
|
||||
csiNodeLister: getCSINodeListerIfEnabled(informerFactory),
|
||||
pvLister: pvLister,
|
||||
pvcLister: pvcLister,
|
||||
scLister: scLister,
|
||||
randomVolumeIDPrefix: rand.String(32),
|
||||
translator: csitrans.New(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getVolumeLimits(nodeInfo *schedulernodeinfo.NodeInfo, csiNode *storagev1.CSINode) map[v1.ResourceName]int64 {
|
||||
// TODO: stop getting values from Node object in v1.18
|
||||
nodeVolumeLimits := nodeInfo.VolumeLimits()
|
||||
if csiNode != nil {
|
||||
for i := range csiNode.Spec.Drivers {
|
||||
d := csiNode.Spec.Drivers[i]
|
||||
if d.Allocatable != nil && d.Allocatable.Count != nil {
|
||||
// TODO: drop GetCSIAttachLimitKey once we don't get values from Node object (v1.18)
|
||||
k := v1.ResourceName(volumeutil.GetCSIAttachLimitKey(d.Name))
|
||||
nodeVolumeLimits[k] = int64(*d.Allocatable.Count)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nodeVolumeLimits
|
||||
}
|
||||
|
@ -27,9 +27,11 @@ import (
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/rand"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
csitrans "k8s.io/csi-translation-lib"
|
||||
csilibplugins "k8s.io/csi-translation-lib/plugins"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
|
||||
@ -460,7 +462,12 @@ func TestCSILimits(t *testing.T) {
|
||||
}
|
||||
|
||||
p := &CSILimits{
|
||||
predicate: predicates.NewCSIMaxVolumeLimitPredicate(getFakeCSINodeLister(csiNode), getFakeCSIPVLister(test.filterName, test.driverNames...), getFakeCSIPVCLister(test.filterName, "csi-sc", test.driverNames...), getFakeCSIStorageClassLister("csi-sc", test.driverNames[0])),
|
||||
csiNodeLister: getFakeCSINodeLister(csiNode),
|
||||
pvLister: getFakeCSIPVLister(test.filterName, test.driverNames...),
|
||||
pvcLister: getFakeCSIPVCLister(test.filterName, "csi-sc", test.driverNames...),
|
||||
scLister: getFakeCSIStorageClassLister("csi-sc", test.driverNames[0]),
|
||||
randomVolumeIDPrefix: rand.String(32),
|
||||
translator: csitrans.New(),
|
||||
}
|
||||
gotStatus := p.Filter(context.Background(), nil, test.newPod, node)
|
||||
if !reflect.DeepEqual(gotStatus, test.wantStatus) {
|
||||
|
81
pkg/scheduler/framework/plugins/nodevolumelimits/utils.go
Normal file
81
pkg/scheduler/framework/plugins/nodevolumelimits/utils.go
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodevolumelimits
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
csilibplugins "k8s.io/csi-translation-lib/plugins"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
// isCSIMigrationOn returns a boolean value indicating whether
|
||||
// the CSI migration has been enabled for a particular storage plugin.
|
||||
func isCSIMigrationOn(csiNode *storagev1.CSINode, pluginName string) bool {
|
||||
if csiNode == nil || len(pluginName) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// In-tree storage to CSI driver migration feature should be enabled,
|
||||
// along with the plugin-specific one
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigration) {
|
||||
return false
|
||||
}
|
||||
|
||||
switch pluginName {
|
||||
case csilibplugins.AWSEBSInTreePluginName:
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAWS) {
|
||||
return false
|
||||
}
|
||||
case csilibplugins.GCEPDInTreePluginName:
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationGCE) {
|
||||
return false
|
||||
}
|
||||
case csilibplugins.AzureDiskInTreePluginName:
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAzureDisk) {
|
||||
return false
|
||||
}
|
||||
case csilibplugins.CinderInTreePluginName:
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationOpenStack) {
|
||||
return false
|
||||
}
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
// The plugin name should be listed in the CSINode object annotation.
|
||||
// This indicates that the plugin has been migrated to a CSI driver in the node.
|
||||
csiNodeAnn := csiNode.GetAnnotations()
|
||||
if csiNodeAnn == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var mpaSet sets.String
|
||||
mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey]
|
||||
if len(mpa) == 0 {
|
||||
mpaSet = sets.NewString()
|
||||
} else {
|
||||
tok := strings.Split(mpa, ",")
|
||||
mpaSet = sets.NewString(tok...)
|
||||
}
|
||||
|
||||
return mpaSet.Has(pluginName)
|
||||
}
|
Loading…
Reference in New Issue
Block a user