diff --git a/pkg/controller/volume/events/event.go b/pkg/controller/volume/events/event.go index b5ac3fb7be8..c99c30c99a7 100644 --- a/pkg/controller/volume/events/event.go +++ b/pkg/controller/volume/events/event.go @@ -29,4 +29,5 @@ const ( ProvisioningFailed = "ProvisioningFailed" ProvisioningCleanupFailed = "ProvisioningCleanupFailed" ProvisioningSucceeded = "ProvisioningSucceeded" + WaitForFirstConsumer = "WaitForFirstConsumer" ) diff --git a/pkg/controller/volume/persistentvolume/binder_test.go b/pkg/controller/volume/persistentvolume/binder_test.go index 8288cd0ca82..a34b595747f 100644 --- a/pkg/controller/volume/persistentvolume/binder_test.go +++ b/pkg/controller/volume/persistentvolume/binder_test.go @@ -21,6 +21,7 @@ import ( "k8s.io/api/core/v1" storage "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" ) @@ -178,6 +179,25 @@ func TestSync(t *testing.T) { []string{"Normal FailedBinding"}, noerrors, testSyncClaim, }, + { + // syncClaim does not do anything when binding is delayed + "1-13 - delayed binding", + newVolumeArray("volume1-1", "1Gi", "", "", v1.VolumePending, v1.PersistentVolumeReclaimRetain, classWait), + newVolumeArray("volume1-1", "1Gi", "", "", v1.VolumePending, v1.PersistentVolumeReclaimRetain, classWait), + newClaimArray("claim1-1", "uid1-1", "1Gi", "", v1.ClaimPending, &classWait), + newClaimArray("claim1-1", "uid1-1", "1Gi", "", v1.ClaimPending, &classWait), + []string{"Normal WaitForFirstConsumer"}, + noerrors, testSyncClaim, + }, + { + // syncClaim binds when binding is delayed but PV is prebound to PVC + "1-14 - successful prebound PV", + newVolumeArray("volume1-1", "1Gi", "", "claim1-1", v1.VolumePending, v1.PersistentVolumeReclaimRetain, classWait), + newVolumeArray("volume1-1", "1Gi", "uid1-1", "claim1-1", v1.VolumeBound, v1.PersistentVolumeReclaimRetain, classWait), + newClaimArray("claim1-1", "uid1-1", "1Gi", "", v1.ClaimPending, &classWait), + newClaimArray("claim1-1", "uid1-1", "1Gi", "volume1-1", v1.ClaimBound, &classWait, annBoundByController, annBindCompleted), + noevents, noerrors, testSyncClaim, + }, // [Unit test set 2] User asked for a specific PV. // Test the binding when pv.ClaimRef is already set by controller or @@ -570,7 +590,15 @@ func TestSync(t *testing.T) { }, } - runSyncTests(t, tests, []*storage.StorageClass{}) + utilfeature.DefaultFeatureGate.Set("VolumeScheduling=true") + defer utilfeature.DefaultFeatureGate.Set("VolumeScheduling=false") + + runSyncTests(t, tests, []*storage.StorageClass{ + { + ObjectMeta: metav1.ObjectMeta{Name: classWait}, + VolumeBindingMode: &modeWait, + }, + }) } func TestSyncAlphaBlockVolume(t *testing.T) { diff --git a/pkg/controller/volume/persistentvolume/framework_test.go b/pkg/controller/volume/persistentvolume/framework_test.go index d826947191d..1e347ae9ffb 100644 --- a/pkg/controller/volume/persistentvolume/framework_test.go +++ b/pkg/controller/volume/persistentvolume/framework_test.go @@ -822,6 +822,9 @@ var ( classUnknownInternal string = "unknown-internal" classUnsupportedMountOptions string = "unsupported-mountoptions" classLarge string = "large" + classWait string = "wait" + + modeWait = storage.VolumeBindingWaitForFirstConsumer ) // wrapTestWithPluginCalls returns a testCall that: diff --git a/pkg/controller/volume/persistentvolume/index.go b/pkg/controller/volume/persistentvolume/index.go index cf6adca4ec3..dd652471d7a 100644 --- a/pkg/controller/volume/persistentvolume/index.go +++ b/pkg/controller/volume/persistentvolume/index.go @@ -29,6 +29,7 @@ import ( v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/volume" + volumeutil "k8s.io/kubernetes/pkg/volume/util" ) // persistentVolumeOrderedIndex is a cache.Store that keeps persistent volumes @@ -74,7 +75,7 @@ func (pvIndex *persistentVolumeOrderedIndex) listByAccessModes(modes []v1.Persis } // find returns the nearest PV from the ordered list or nil if a match is not found -func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVolumeClaim) (*v1.PersistentVolume, error) { +func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVolumeClaim, delayBinding bool) (*v1.PersistentVolume, error) { // PVs are indexed by their access modes to allow easier searching. Each // index is the string representation of a set of access modes. There is a // finite number of possible sets and PVs will only be indexed in one of @@ -96,7 +97,7 @@ func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVol return nil, err } - bestVol, err := findMatchingVolume(claim, volumes) + bestVol, err := findMatchingVolume(claim, volumes, nil /* node for topology binding*/, nil /* exclusion map */, delayBinding) if err != nil { return nil, err } @@ -108,7 +109,27 @@ func (pvIndex *persistentVolumeOrderedIndex) findByClaim(claim *v1.PersistentVol return nil, nil } -func findMatchingVolume(claim *v1.PersistentVolumeClaim, volumes []*v1.PersistentVolume) (*v1.PersistentVolume, error) { +// findMatchingVolume goes through the list of volumes to find the best matching volume +// for the claim. +// +// This function is used by both the PV controller and scheduler. +// +// delayBinding is true only in the PV controller path. When set, prebound PVs are still returned +// as a match for the claim, but unbound PVs are skipped. +// +// node is set only in the scheduler path. When set, the PV node affinity is checked against +// the node's labels. +// +// excludedVolumes is only used in the scheduler path, and is needed for evaluating multiple +// unbound PVCs for a single Pod at one time. As each PVC finds a matching PV, the chosen +// PV needs to be excluded from future matching. +func findMatchingVolume( + claim *v1.PersistentVolumeClaim, + volumes []*v1.PersistentVolume, + node *v1.Node, + excludedVolumes map[string]*v1.PersistentVolume, + delayBinding bool) (*v1.PersistentVolume, error) { + var smallestVolume *v1.PersistentVolume var smallestVolumeQty resource.Quantity requestedQty := claim.Spec.Resources.Requests[v1.ResourceName(v1.ResourceStorage)] @@ -131,6 +152,11 @@ func findMatchingVolume(claim *v1.PersistentVolumeClaim, volumes []*v1.Persisten // - find the smallest matching one if there is no volume pre-bound to // the claim. for _, volume := range volumes { + if _, ok := excludedVolumes[volume.Name]; ok { + // Skip volumes in the excluded list + continue + } + volumeQty := volume.Spec.Capacity[v1.ResourceStorage] // check if volumeModes do not match (Alpha and feature gate protected) @@ -143,6 +169,15 @@ func findMatchingVolume(claim *v1.PersistentVolumeClaim, volumes []*v1.Persisten continue } + if node != nil { + // Scheduler path, check that the PV NodeAffinity + // is satisfied by the node + err := volumeutil.CheckNodeAffinity(volume, node.Labels) + if err != nil { + continue + } + } + if isVolumeBoundToClaim(volume, claim) { // this claim and volume are pre-bound; return // the volume if the size request is satisfied, @@ -153,6 +188,13 @@ func findMatchingVolume(claim *v1.PersistentVolumeClaim, volumes []*v1.Persisten return volume, nil } + if node == nil && delayBinding { + // PV controller does not bind this claim. + // Scheduler will handle binding unbound volumes + // Scheduler path will have node != nil + continue + } + // filter out: // - volumes bound to another claim // - volumes whose labels don't match the claim's selector, if specified @@ -166,6 +208,14 @@ func findMatchingVolume(claim *v1.PersistentVolumeClaim, volumes []*v1.Persisten continue } + if node != nil { + // Scheduler path + // Check that the access modes match + if !checkAccessModes(claim, volume) { + continue + } + } + if volumeQty.Cmp(requestedQty) >= 0 { if smallestVolume == nil || smallestVolumeQty.Cmp(volumeQty) > 0 { smallestVolume = volume @@ -204,8 +254,8 @@ func checkVolumeModeMisMatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1 } // findBestMatchForClaim is a convenience method that finds a volume by the claim's AccessModes and requests for Storage -func (pvIndex *persistentVolumeOrderedIndex) findBestMatchForClaim(claim *v1.PersistentVolumeClaim) (*v1.PersistentVolume, error) { - return pvIndex.findByClaim(claim) +func (pvIndex *persistentVolumeOrderedIndex) findBestMatchForClaim(claim *v1.PersistentVolumeClaim, delayBinding bool) (*v1.PersistentVolume, error) { + return pvIndex.findByClaim(claim, delayBinding) } // allPossibleMatchingAccessModes returns an array of AccessMode arrays that @@ -287,3 +337,19 @@ func claimToClaimKey(claim *v1.PersistentVolumeClaim) string { func claimrefToClaimKey(claimref *v1.ObjectReference) string { return fmt.Sprintf("%s/%s", claimref.Namespace, claimref.Name) } + +// Returns true if PV satisfies all the PVC's requested AccessModes +func checkAccessModes(claim *v1.PersistentVolumeClaim, volume *v1.PersistentVolume) bool { + pvModesMap := map[v1.PersistentVolumeAccessMode]bool{} + for _, mode := range volume.Spec.AccessModes { + pvModesMap[mode] = true + } + + for _, mode := range claim.Spec.AccessModes { + _, ok := pvModesMap[mode] + if !ok { + return false + } + } + return true +} diff --git a/pkg/controller/volume/persistentvolume/index_test.go b/pkg/controller/volume/persistentvolume/index_test.go index a1edc3bb286..b734a67a991 100644 --- a/pkg/controller/volume/persistentvolume/index_test.go +++ b/pkg/controller/volume/persistentvolume/index_test.go @@ -20,6 +20,8 @@ import ( "sort" "testing" + "github.com/golang/glog" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -27,6 +29,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" ref "k8s.io/client-go/tools/reference" "k8s.io/kubernetes/pkg/api/testapi" + "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/volume" ) @@ -178,7 +181,7 @@ func TestMatchVolume(t *testing.T) { } for name, scenario := range scenarios { - volume, err := volList.findBestMatchForClaim(scenario.claim) + volume, err := volList.findBestMatchForClaim(scenario.claim, false) if err != nil { t.Errorf("Unexpected error matching volume by claim: %v", err) } @@ -249,7 +252,7 @@ func TestMatchingWithBoundVolumes(t *testing.T) { }, } - volume, err := volumeIndex.findBestMatchForClaim(claim) + volume, err := volumeIndex.findBestMatchForClaim(claim, false) if err != nil { t.Fatalf("Unexpected error matching volume by claim: %v", err) } @@ -372,27 +375,27 @@ func TestFindingVolumeWithDifferentAccessModes(t *testing.T) { index.store.Add(ebs) index.store.Add(nfs) - volume, _ := index.findBestMatchForClaim(claim) + volume, _ := index.findBestMatchForClaim(claim, false) if volume.Name != ebs.Name { t.Errorf("Expected %s but got volume %s instead", ebs.Name, volume.Name) } claim.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce, v1.ReadOnlyMany} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != gce.Name { t.Errorf("Expected %s but got volume %s instead", gce.Name, volume.Name) } // order of the requested modes should not matter claim.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany, v1.ReadWriteOnce, v1.ReadOnlyMany} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != nfs.Name { t.Errorf("Expected %s but got volume %s instead", nfs.Name, volume.Name) } // fewer modes requested should still match claim.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != nfs.Name { t.Errorf("Expected %s but got volume %s instead", nfs.Name, volume.Name) } @@ -400,7 +403,7 @@ func TestFindingVolumeWithDifferentAccessModes(t *testing.T) { // pretend the exact match is bound. should get the next level up of modes. ebs.Spec.ClaimRef = &v1.ObjectReference{} claim.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != gce.Name { t.Errorf("Expected %s but got volume %s instead", gce.Name, volume.Name) } @@ -408,7 +411,7 @@ func TestFindingVolumeWithDifferentAccessModes(t *testing.T) { // continue up the levels of modes. gce.Spec.ClaimRef = &v1.ObjectReference{} claim.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != nfs.Name { t.Errorf("Expected %s but got volume %s instead", nfs.Name, volume.Name) } @@ -416,7 +419,7 @@ func TestFindingVolumeWithDifferentAccessModes(t *testing.T) { // partial mode request gce.Spec.ClaimRef = nil claim.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != gce.Name { t.Errorf("Expected %s but got volume %s instead", gce.Name, volume.Name) } @@ -675,6 +678,87 @@ func createTestVolumes() []*v1.PersistentVolume { StorageClassName: classLarge, }, }, + { + ObjectMeta: metav1.ObjectMeta{ + UID: "affinity-pv", + Name: "affinity001", + Annotations: getAnnotationWithNodeAffinity("key1", "value1"), + }, + Spec: v1.PersistentVolumeSpec{ + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): resource.MustParse("100G"), + }, + PersistentVolumeSource: v1.PersistentVolumeSource{ + Local: &v1.LocalVolumeSource{}, + }, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + v1.ReadOnlyMany, + }, + StorageClassName: classWait, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + UID: "affinity-pv2", + Name: "affinity002", + Annotations: getAnnotationWithNodeAffinity("key1", "value1"), + }, + Spec: v1.PersistentVolumeSpec{ + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): resource.MustParse("150G"), + }, + PersistentVolumeSource: v1.PersistentVolumeSource{ + Local: &v1.LocalVolumeSource{}, + }, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + v1.ReadOnlyMany, + }, + StorageClassName: classWait, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + UID: "affinity-prebound", + Name: "affinity003", + Annotations: getAnnotationWithNodeAffinity("key1", "value1"), + }, + Spec: v1.PersistentVolumeSpec{ + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): resource.MustParse("100G"), + }, + PersistentVolumeSource: v1.PersistentVolumeSource{ + Local: &v1.LocalVolumeSource{}, + }, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + v1.ReadOnlyMany, + }, + StorageClassName: classWait, + ClaimRef: &v1.ObjectReference{Name: "claim02", Namespace: "myns"}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + UID: "affinity-pv3", + Name: "affinity003", + Annotations: getAnnotationWithNodeAffinity("key1", "value3"), + }, + Spec: v1.PersistentVolumeSpec{ + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceStorage): resource.MustParse("200G"), + }, + PersistentVolumeSource: v1.PersistentVolumeSource{ + Local: &v1.LocalVolumeSource{}, + }, + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + v1.ReadOnlyMany, + }, + StorageClassName: classWait, + }, + }, } } @@ -692,6 +776,32 @@ func testVolume(name, size string) *v1.PersistentVolume { } } +func getAnnotationWithNodeAffinity(key string, value string) map[string]string { + affinity := &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: key, + Operator: v1.NodeSelectorOpIn, + Values: []string{value}, + }, + }, + }, + }, + }, + } + + annotations := map[string]string{} + err := helper.StorageNodeAffinityToAlphaAnnotation(annotations, affinity) + if err != nil { + glog.Fatalf("Failed to get node affinity annotation: %v", err) + } + + return annotations +} + func createVolumeModeBlockTestVolume() *v1.PersistentVolume { blockMode := v1.PersistentVolumeBlock @@ -919,7 +1029,7 @@ func TestAlphaFilteringVolumeModes(t *testing.T) { for name, scenario := range scenarios { toggleBlockVolumeFeature(scenario.enableBlock, t) - pvmatch, err := scenario.vol.findBestMatchForClaim(scenario.pvc) + pvmatch, err := scenario.vol.findBestMatchForClaim(scenario.pvc, false) // expected to match but either got an error or no returned pvmatch if pvmatch == nil && scenario.isExpectedMatch { t.Errorf("Unexpected failure for scenario, no matching volume: %s", name) @@ -972,14 +1082,14 @@ func TestFindingPreboundVolumes(t *testing.T) { index.store.Add(pvBadMode) // expected exact match on size - volume, _ := index.findBestMatchForClaim(claim) + volume, _ := index.findBestMatchForClaim(claim, false) if volume.Name != pv1.Name { t.Errorf("Expected %s but got volume %s instead", pv1.Name, volume.Name) } // pretend the exact match is pre-bound. should get the next size up. pv1.Spec.ClaimRef = &v1.ObjectReference{Name: "foo", Namespace: "bar"} - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != pv5.Name { t.Errorf("Expected %s but got volume %s instead", pv5.Name, volume.Name) } @@ -987,7 +1097,7 @@ func TestFindingPreboundVolumes(t *testing.T) { // pretend the exact match is available but the largest volume is pre-bound to the claim. pv1.Spec.ClaimRef = nil pv8.Spec.ClaimRef = claimRef - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != pv8.Name { t.Errorf("Expected %s but got volume %s instead", pv8.Name, volume.Name) } @@ -995,7 +1105,7 @@ func TestFindingPreboundVolumes(t *testing.T) { // pretend the volume with too small a size is pre-bound to the claim. should get the exact match. pv8.Spec.ClaimRef = nil pvBadSize.Spec.ClaimRef = claimRef - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != pv1.Name { t.Errorf("Expected %s but got volume %s instead", pv1.Name, volume.Name) } @@ -1003,12 +1113,186 @@ func TestFindingPreboundVolumes(t *testing.T) { // pretend the volume without the right access mode is pre-bound to the claim. should get the exact match. pvBadSize.Spec.ClaimRef = nil pvBadMode.Spec.ClaimRef = claimRef - volume, _ = index.findBestMatchForClaim(claim) + volume, _ = index.findBestMatchForClaim(claim, false) if volume.Name != pv1.Name { t.Errorf("Expected %s but got volume %s instead", pv1.Name, volume.Name) } } +func TestBestMatchDelayed(t *testing.T) { + volList := newPersistentVolumeOrderedIndex() + for _, pv := range createTestVolumes() { + volList.store.Add(pv) + } + + // binding through PV controller should be delayed + claim := makePVC("8G", nil) + volume, err := volList.findBestMatchForClaim(claim, true) + if err != nil { + t.Errorf("Unexpected error matching volume by claim: %v", err) + } + if volume != nil { + t.Errorf("Unexpected match with %q", volume.UID) + } +} + +func TestFindMatchVolumeWithNode(t *testing.T) { + volumes := createTestVolumes() + node1 := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"key1": "value1"}, + }, + } + node2 := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"key1": "value2"}, + }, + } + node3 := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"key1": "value3"}, + }, + } + + scenarios := map[string]struct { + expectedMatch string + claim *v1.PersistentVolumeClaim + node *v1.Node + excludedVolumes map[string]*v1.PersistentVolume + }{ + "success-match": { + expectedMatch: "affinity-pv", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + }), + node: node1, + }, + "success-prebound": { + expectedMatch: "affinity-prebound", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + pvc.Name = "claim02" + }), + node: node1, + }, + "success-exclusion": { + expectedMatch: "affinity-pv2", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + }), + node: node1, + excludedVolumes: map[string]*v1.PersistentVolume{"affinity001": nil}, + }, + "fail-exclusion": { + expectedMatch: "", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + }), + node: node1, + excludedVolumes: map[string]*v1.PersistentVolume{"affinity001": nil, "affinity002": nil}, + }, + "fail-accessmode": { + expectedMatch: "", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany} + pvc.Spec.StorageClassName = &classWait + }), + node: node1, + }, + "fail-nodeaffinity": { + expectedMatch: "", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + }), + node: node2, + }, + "fail-prebound-node-affinity": { + expectedMatch: "", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + pvc.Name = "claim02" + }), + node: node2, + }, + "success-bad-and-good-node-affinity": { + expectedMatch: "affinity-pv3", + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + pvc.Spec.StorageClassName = &classWait + pvc.Name = "claim03" + }), + node: node3, + }, + } + + for name, scenario := range scenarios { + volume, err := findMatchingVolume(scenario.claim, volumes, scenario.node, scenario.excludedVolumes, true) + if err != nil { + t.Errorf("Unexpected error matching volume by claim: %v", err) + } + if len(scenario.expectedMatch) != 0 && volume == nil { + t.Errorf("Expected match but received nil volume for scenario: %s", name) + } + if len(scenario.expectedMatch) != 0 && volume != nil && string(volume.UID) != scenario.expectedMatch { + t.Errorf("Expected %s but got volume %s in scenario %s", scenario.expectedMatch, volume.UID, name) + } + if len(scenario.expectedMatch) == 0 && volume != nil { + t.Errorf("Unexpected match for scenario: %s, matched with %s instead", name, volume.UID) + } + } +} + +func TestCheckAccessModes(t *testing.T) { + volume := &v1.PersistentVolume{ + Spec: v1.PersistentVolumeSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce, v1.ReadWriteMany}, + }, + } + + scenarios := map[string]struct { + shouldSucceed bool + claim *v1.PersistentVolumeClaim + }{ + "success-single-mode": { + shouldSucceed: true, + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany} + }), + }, + "success-many-modes": { + shouldSucceed: true, + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany, v1.ReadWriteOnce} + }), + }, + "fail-single-mode": { + shouldSucceed: false, + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany} + }), + }, + "fail-many-modes": { + shouldSucceed: false, + claim: makePVC("100G", func(pvc *v1.PersistentVolumeClaim) { + pvc.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany, v1.ReadOnlyMany} + }), + }, + } + + for name, scenario := range scenarios { + result := checkAccessModes(scenario.claim, volume) + if result != scenario.shouldSucceed { + t.Errorf("Test %q failed: Expected %v, got %v", name, scenario.shouldSucceed, result) + } + } +} + // byCapacity is used to order volumes by ascending storage size type byCapacity struct { volumes []*v1.PersistentVolume diff --git a/pkg/controller/volume/persistentvolume/pv_controller.go b/pkg/controller/volume/persistentvolume/pv_controller.go index 8819a681a84..980d960c750 100644 --- a/pkg/controller/volume/persistentvolume/pv_controller.go +++ b/pkg/controller/volume/persistentvolume/pv_controller.go @@ -26,6 +26,7 @@ import ( storage "k8s.io/api/storage/v1" apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" corelisters "k8s.io/client-go/listers/core/v1" @@ -37,6 +38,7 @@ import ( v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/controller/volume/events" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/util/goroutinemap" "k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff" vol "k8s.io/kubernetes/pkg/volume" @@ -254,6 +256,30 @@ func checkVolumeSatisfyClaim(volume *v1.PersistentVolume, claim *v1.PersistentVo return nil } +func (ctrl *PersistentVolumeController) shouldDelayBinding(claim *v1.PersistentVolumeClaim) (bool, error) { + if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) { + return false, nil + } + + className := v1helper.GetPersistentVolumeClaimClass(claim) + if className == "" { + return false, nil + } + + class, err := ctrl.classLister.Get(className) + if err != nil { + return false, nil + } + + if class.VolumeBindingMode == nil { + return false, fmt.Errorf("VolumeBindingMode not set for StorageClass %q", className) + } + + // TODO: add check to handle dynamic provisioning later + + return *class.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer, nil +} + // syncUnboundClaim is the main controller method to decide what to do with an // unbound claim. func (ctrl *PersistentVolumeController) syncUnboundClaim(claim *v1.PersistentVolumeClaim) error { @@ -261,9 +287,13 @@ func (ctrl *PersistentVolumeController) syncUnboundClaim(claim *v1.PersistentVol // OBSERVATION: pvc is "Pending" if claim.Spec.VolumeName == "" { // User did not care which PV they get. + delayBinding, err := ctrl.shouldDelayBinding(claim) + if err != nil { + return err + } // [Unit test set 1] - volume, err := ctrl.volumes.findBestMatchForClaim(claim) + volume, err := ctrl.volumes.findBestMatchForClaim(claim, delayBinding) if err != nil { glog.V(2).Infof("synchronizing unbound PersistentVolumeClaim[%s]: Error finding PV for claim: %v", claimToClaimKey(claim), err) return fmt.Errorf("Error finding PV for claim %q: %v", claimToClaimKey(claim), err) @@ -272,15 +302,21 @@ func (ctrl *PersistentVolumeController) syncUnboundClaim(claim *v1.PersistentVol glog.V(4).Infof("synchronizing unbound PersistentVolumeClaim[%s]: no volume found", claimToClaimKey(claim)) // No PV could be found // OBSERVATION: pvc is "Pending", will retry - if v1helper.GetPersistentVolumeClaimClass(claim) != "" { + switch { + case delayBinding: + // TODO: Skip dynamic provisioning for now + ctrl.eventRecorder.Event(claim, v1.EventTypeNormal, events.WaitForFirstConsumer, "waiting for first consumer to be created before binding") + case v1helper.GetPersistentVolumeClaimClass(claim) != "": if err = ctrl.provisionClaim(claim); err != nil { return err } return nil + default: + ctrl.eventRecorder.Event(claim, v1.EventTypeNormal, events.FailedBinding, "no persistent volumes available for this claim and no storage class is set") } + // Mark the claim as Pending and try to find a match in the next // periodic syncClaim - ctrl.eventRecorder.Event(claim, v1.EventTypeNormal, events.FailedBinding, "no persistent volumes available for this claim and no storage class is set") if _, err = ctrl.updateClaimStatus(claim, v1.ClaimPending, nil); err != nil { return err } diff --git a/pkg/controller/volume/persistentvolume/pv_controller_test.go b/pkg/controller/volume/persistentvolume/pv_controller_test.go index 41d5ce4b831..5454fe26ece 100644 --- a/pkg/controller/volume/persistentvolume/pv_controller_test.go +++ b/pkg/controller/volume/persistentvolume/pv_controller_test.go @@ -21,8 +21,12 @@ import ( "time" "github.com/golang/glog" + "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/watch" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -232,3 +236,106 @@ func addVolumeAnnotation(volume *v1.PersistentVolume, annName, annValue string) volume.Annotations[annName] = annValue return volume } + +func makePVCClass(scName *string) *v1.PersistentVolumeClaim { + return &v1.PersistentVolumeClaim{ + Spec: v1.PersistentVolumeClaimSpec{ + StorageClassName: scName, + }, + } +} + +func makeStorageClass(scName string, mode *storagev1.VolumeBindingMode) *storagev1.StorageClass { + return &storagev1.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: scName, + }, + VolumeBindingMode: mode, + } +} + +func TestDelayBinding(t *testing.T) { + var ( + classNotHere = "not-here" + classNoMode = "no-mode" + classImmediateMode = "immediate-mode" + classWaitMode = "wait-mode" + + modeImmediate = storagev1.VolumeBindingImmediate + modeWait = storagev1.VolumeBindingWaitForFirstConsumer + ) + + tests := map[string]struct { + pvc *v1.PersistentVolumeClaim + shouldDelay bool + shouldFail bool + }{ + "nil-class": { + pvc: makePVCClass(nil), + shouldDelay: false, + }, + "class-not-found": { + pvc: makePVCClass(&classNotHere), + shouldDelay: false, + }, + "no-mode-class": { + pvc: makePVCClass(&classNoMode), + shouldDelay: false, + shouldFail: true, + }, + "immediate-mode-class": { + pvc: makePVCClass(&classImmediateMode), + shouldDelay: false, + }, + "wait-mode-class": { + pvc: makePVCClass(&classWaitMode), + shouldDelay: true, + }, + } + + classes := []*storagev1.StorageClass{ + makeStorageClass(classNoMode, nil), + makeStorageClass(classImmediateMode, &modeImmediate), + makeStorageClass(classWaitMode, &modeWait), + } + + client := &fake.Clientset{} + informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) + classInformer := informerFactory.Storage().V1().StorageClasses() + ctrl := &PersistentVolumeController{ + classLister: classInformer.Lister(), + } + + for _, class := range classes { + if err := classInformer.Informer().GetIndexer().Add(class); err != nil { + t.Fatalf("Failed to add storage class %q: %v", class.Name, err) + } + } + + // When feature gate is disabled, should always be delayed + name := "feature-disabled" + shouldDelay, err := ctrl.shouldDelayBinding(makePVCClass(&classWaitMode)) + if err != nil { + t.Errorf("Test %q returned error: %v", name, err) + } + if shouldDelay { + t.Errorf("Test %q returned true, expected false", name) + } + + // Enable feature gate + utilfeature.DefaultFeatureGate.Set("VolumeScheduling=true") + defer utilfeature.DefaultFeatureGate.Set("VolumeScheduling=false") + + for name, test := range tests { + shouldDelay, err = ctrl.shouldDelayBinding(test.pvc) + if err != nil && !test.shouldFail { + t.Errorf("Test %q returned error: %v", name, err) + } + if err == nil && test.shouldFail { + t.Errorf("Test %q returned success, expected error", name) + } + if shouldDelay != test.shouldDelay { + t.Errorf("Test %q returned unexpected %v", name, test.shouldDelay) + } + } +}