diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index 486513f9c45..44747176be5 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -56,11 +56,11 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { if volume.GCEPersistentDisk != nil { disk := volume.GCEPersistentDisk - manifest := &(pod.Spec) - for ix := range manifest.Volumes { - if manifest.Volumes[ix].GCEPersistentDisk != nil && - manifest.Volumes[ix].GCEPersistentDisk.PDName == disk.PDName && - !(manifest.Volumes[ix].GCEPersistentDisk.ReadOnly && disk.ReadOnly) { + existingPod := &(pod.Spec) + for ix := range existingPod.Volumes { + if existingPod.Volumes[ix].GCEPersistentDisk != nil && + existingPod.Volumes[ix].GCEPersistentDisk.PDName == disk.PDName && + !(existingPod.Volumes[ix].GCEPersistentDisk.ReadOnly && disk.ReadOnly) { return true } } @@ -68,27 +68,47 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { if volume.AWSElasticBlockStore != nil { volumeID := volume.AWSElasticBlockStore.VolumeID - manifest := &(pod.Spec) - for ix := range manifest.Volumes { - if manifest.Volumes[ix].AWSElasticBlockStore != nil && - manifest.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { + existingPod := &(pod.Spec) + for ix := range existingPod.Volumes { + if existingPod.Volumes[ix].AWSElasticBlockStore != nil && + existingPod.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { return true } } } + if volume.RBD != nil { + mon := volume.RBD.CephMonitors + pool := volume.RBD.RBDPool + image := volume.RBD.RBDImage + + existingPod := &(pod.Spec) + for ix := range existingPod.Volumes { + if existingPod.Volumes[ix].RBD != nil { + mon_m := existingPod.Volumes[ix].RBD.CephMonitors + pool_m := existingPod.Volumes[ix].RBD.RBDPool + image_m := existingPod.Volumes[ix].RBD.RBDImage + if haveSame(mon, mon_m) && pool_m == pool && image_m == image { + return true + } + } + } + } return false } // NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that -// are already mounted. Some times of volumes are mounted onto node machines. For now, these mounts -// are exclusive so if there is already a volume mounted on that node, another pod can't schedule -// there. This is GCE and Amazon EBS specific for now. +// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume +// can't be scheduled there. +// This is GCE, Amazon EBS, and Ceph RBD specific for now: +// - GCE PD allows multiple mounts as long as they're all read-only +// - AWS EBS forbids any two pods mounting the same volume ID +// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { - manifest := &(pod.Spec) - for ix := range manifest.Volumes { + podSpec := &(pod.Spec) + for ix := range podSpec.Volumes { for podIx := range existingPods { - if isVolumeConflict(manifest.Volumes[ix], existingPods[podIx]) { + if isVolumeConflict(podSpec.Volumes[ix], existingPods[podIx]) { return false, nil } } @@ -416,3 +436,15 @@ func MapPodsToMachines(lister algorithm.PodLister) (map[string][]*api.Pod, error } return machineToPods, nil } + +// search two arrays and return true if they have at least one common element; return false otherwise +func haveSame(a1, a2 []string) bool { + for _, val1 := range a1 { + for _, val2 := range a2 { + if val1 == val2 { + return true + } + } + } + return false +} diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go index 0942a40b99a..5dc1a148cee 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -435,6 +435,61 @@ func TestAWSDiskConflicts(t *testing.T) { } } +func TestRBDDiskConflicts(t *testing.T) { + volState := api.PodSpec{ + Volumes: []api.Volume{ + { + VolumeSource: api.VolumeSource{ + RBD: &api.RBDVolumeSource{ + CephMonitors: []string{"a", "b"}, + RBDPool: "foo", + RBDImage: "bar", + FSType: "ext4", + }, + }, + }, + }, + } + volState2 := api.PodSpec{ + Volumes: []api.Volume{ + { + VolumeSource: api.VolumeSource{ + RBD: &api.RBDVolumeSource{ + CephMonitors: []string{"c", "d"}, + RBDPool: "foo", + RBDImage: "bar", + FSType: "ext4", + }, + }, + }, + }, + } + tests := []struct { + pod *api.Pod + existingPods []*api.Pod + isOk bool + test string + }{ + {&api.Pod{}, []*api.Pod{}, true, "nothing"}, + {&api.Pod{}, []*api.Pod{{Spec: volState}}, true, "one state"}, + {&api.Pod{Spec: volState}, []*api.Pod{{Spec: volState}}, false, "same state"}, + {&api.Pod{Spec: volState2}, []*api.Pod{{Spec: volState}}, true, "different state"}, + } + + for _, test := range tests { + ok, err := NoDiskConflict(test.pod, test.existingPods, "machine") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if test.isOk && !ok { + t.Errorf("expected ok, got none. %v %v %s", test.pod, test.existingPods, test.test) + } + if !test.isOk && ok { + t.Errorf("expected no ok, got one. %v %v %s", test.pod, test.existingPods, test.test) + } + } +} + func TestPodFitsSelector(t *testing.T) { tests := []struct { pod *api.Pod