From bd10664851c752438434810dbc845062960242c7 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Tue, 20 Oct 2015 13:24:23 -0400 Subject: [PATCH 1/3] rbd: support NoDiskConflicts scheduler predicates Signed-off-by: Huamin Chen --- .../algorithm/predicates/predicates.go | 31 ++++++++++- .../algorithm/predicates/predicates_test.go | 55 +++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index b1fff016da5..b9e0cf72ff6 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -76,13 +76,30 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { } } } + if volume.RBD != nil { + mon := volume.RBD.CephMonitors + pool := volume.RBD.RBDPool + image := volume.RBD.RBDImage + + manifest := &(pod.Spec) + for ix := range manifest.Volumes { + if manifest.Volumes[ix].RBD != nil { + mon_m := manifest.Volumes[ix].RBD.CephMonitors + pool_m := manifest.Volumes[ix].RBD.RBDPool + image_m := manifest.Volumes[ix].RBD.RBDImage + if haveSame(mon, mon_m) && pool_m == pool && image_m == image { + return true + } + } + } + } return false } // NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that // are already mounted. Some times of volumes are mounted onto node machines. For now, these mounts // are exclusive so if there is already a volume mounted on that node, another pod can't schedule -// there. This is GCE and Amazon EBS specific for now. +// there. This is GCE, Amazon EBS, and Ceph RBD specific for now. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { manifest := &(pod.Spec) @@ -412,3 +429,15 @@ func MapPodsToMachines(lister algorithm.PodLister) (map[string][]*api.Pod, error } return machineToPods, nil } + +// search two arrays and return true if they have at least one common element; return false otherwise +func haveSame(a1, a2 []string) bool { + for _, val1 := range a1 { + for _, val2 := range a2 { + if val1 == val2 { + return true + } + } + } + return false +} diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go index 0942a40b99a..5dc1a148cee 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -435,6 +435,61 @@ func TestAWSDiskConflicts(t *testing.T) { } } +func TestRBDDiskConflicts(t *testing.T) { + volState := api.PodSpec{ + Volumes: []api.Volume{ + { + VolumeSource: api.VolumeSource{ + RBD: &api.RBDVolumeSource{ + CephMonitors: []string{"a", "b"}, + RBDPool: "foo", + RBDImage: "bar", + FSType: "ext4", + }, + }, + }, + }, + } + volState2 := api.PodSpec{ + Volumes: []api.Volume{ + { + VolumeSource: api.VolumeSource{ + RBD: &api.RBDVolumeSource{ + CephMonitors: []string{"c", "d"}, + RBDPool: "foo", + RBDImage: "bar", + FSType: "ext4", + }, + }, + }, + }, + } + tests := []struct { + pod *api.Pod + existingPods []*api.Pod + isOk bool + test string + }{ + {&api.Pod{}, []*api.Pod{}, true, "nothing"}, + {&api.Pod{}, []*api.Pod{{Spec: volState}}, true, "one state"}, + {&api.Pod{Spec: volState}, []*api.Pod{{Spec: volState}}, false, "same state"}, + {&api.Pod{Spec: volState2}, []*api.Pod{{Spec: volState}}, true, "different state"}, + } + + for _, test := range tests { + ok, err := NoDiskConflict(test.pod, test.existingPods, "machine") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if test.isOk && !ok { + t.Errorf("expected ok, got none. %v %v %s", test.pod, test.existingPods, test.test) + } + if !test.isOk && ok { + t.Errorf("expected no ok, got one. %v %v %s", test.pod, test.existingPods, test.test) + } + } +} + func TestPodFitsSelector(t *testing.T) { tests := []struct { pod *api.Pod From 1ec9829ddfcf6b46acda74afb62ea6748a4efc74 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 22 Oct 2015 09:28:30 -0400 Subject: [PATCH 2/3] replace variable manifest to podSpec to make names unconfusing; update NoDiskConflicts comments Signed-off-by: Huamin Chen --- .../algorithm/predicates/predicates.go | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index b9e0cf72ff6..fc6e413fe8d 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -56,11 +56,11 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { if volume.GCEPersistentDisk != nil { disk := volume.GCEPersistentDisk - manifest := &(pod.Spec) - for ix := range manifest.Volumes { - if manifest.Volumes[ix].GCEPersistentDisk != nil && - manifest.Volumes[ix].GCEPersistentDisk.PDName == disk.PDName && - !(manifest.Volumes[ix].GCEPersistentDisk.ReadOnly && disk.ReadOnly) { + existingPods := &(pod.Spec) + for ix := range existingPods.Volumes { + if existingPods.Volumes[ix].GCEPersistentDisk != nil && + existingPods.Volumes[ix].GCEPersistentDisk.PDName == disk.PDName && + !(existingPods.Volumes[ix].GCEPersistentDisk.ReadOnly && disk.ReadOnly) { return true } } @@ -68,10 +68,10 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { if volume.AWSElasticBlockStore != nil { volumeID := volume.AWSElasticBlockStore.VolumeID - manifest := &(pod.Spec) - for ix := range manifest.Volumes { - if manifest.Volumes[ix].AWSElasticBlockStore != nil && - manifest.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { + existingPods := &(pod.Spec) + for ix := range existingPods.Volumes { + if existingPods.Volumes[ix].AWSElasticBlockStore != nil && + existingPods.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { return true } } @@ -81,12 +81,12 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { pool := volume.RBD.RBDPool image := volume.RBD.RBDImage - manifest := &(pod.Spec) - for ix := range manifest.Volumes { - if manifest.Volumes[ix].RBD != nil { - mon_m := manifest.Volumes[ix].RBD.CephMonitors - pool_m := manifest.Volumes[ix].RBD.RBDPool - image_m := manifest.Volumes[ix].RBD.RBDImage + existingPods := &(pod.Spec) + for ix := range existingPods.Volumes { + if existingPods.Volumes[ix].RBD != nil { + mon_m := existingPods.Volumes[ix].RBD.CephMonitors + pool_m := existingPods.Volumes[ix].RBD.RBDPool + image_m := existingPods.Volumes[ix].RBD.RBDImage if haveSame(mon, mon_m) && pool_m == pool && image_m == image { return true } @@ -97,15 +97,14 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { } // NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that -// are already mounted. Some times of volumes are mounted onto node machines. For now, these mounts -// are exclusive so if there is already a volume mounted on that node, another pod can't schedule -// there. This is GCE, Amazon EBS, and Ceph RBD specific for now. +// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume +// can't be scheduled there. This is GCE, Amazon EBS, and Ceph RBD specific for now. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { - manifest := &(pod.Spec) - for ix := range manifest.Volumes { + podSpec := &(pod.Spec) + for ix := range podSpec.Volumes { for podIx := range existingPods { - if isVolumeConflict(manifest.Volumes[ix], existingPods[podIx]) { + if isVolumeConflict(podSpec.Volumes[ix], existingPods[podIx]) { return false, nil } } From bcbdd442675b07df9625b3e64da44042b5c5d057 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Mon, 2 Nov 2015 10:18:39 -0500 Subject: [PATCH 3/3] rbd NoDiskConflict predicate: review feedback Signed-off-by: Huamin Chen --- .../algorithm/predicates/predicates.go | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index fc6e413fe8d..29c90844a5f 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -56,11 +56,11 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { if volume.GCEPersistentDisk != nil { disk := volume.GCEPersistentDisk - existingPods := &(pod.Spec) - for ix := range existingPods.Volumes { - if existingPods.Volumes[ix].GCEPersistentDisk != nil && - existingPods.Volumes[ix].GCEPersistentDisk.PDName == disk.PDName && - !(existingPods.Volumes[ix].GCEPersistentDisk.ReadOnly && disk.ReadOnly) { + existingPod := &(pod.Spec) + for ix := range existingPod.Volumes { + if existingPod.Volumes[ix].GCEPersistentDisk != nil && + existingPod.Volumes[ix].GCEPersistentDisk.PDName == disk.PDName && + !(existingPod.Volumes[ix].GCEPersistentDisk.ReadOnly && disk.ReadOnly) { return true } } @@ -68,10 +68,10 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { if volume.AWSElasticBlockStore != nil { volumeID := volume.AWSElasticBlockStore.VolumeID - existingPods := &(pod.Spec) - for ix := range existingPods.Volumes { - if existingPods.Volumes[ix].AWSElasticBlockStore != nil && - existingPods.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { + existingPod := &(pod.Spec) + for ix := range existingPod.Volumes { + if existingPod.Volumes[ix].AWSElasticBlockStore != nil && + existingPod.Volumes[ix].AWSElasticBlockStore.VolumeID == volumeID { return true } } @@ -81,12 +81,12 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { pool := volume.RBD.RBDPool image := volume.RBD.RBDImage - existingPods := &(pod.Spec) - for ix := range existingPods.Volumes { - if existingPods.Volumes[ix].RBD != nil { - mon_m := existingPods.Volumes[ix].RBD.CephMonitors - pool_m := existingPods.Volumes[ix].RBD.RBDPool - image_m := existingPods.Volumes[ix].RBD.RBDImage + existingPod := &(pod.Spec) + for ix := range existingPod.Volumes { + if existingPod.Volumes[ix].RBD != nil { + mon_m := existingPod.Volumes[ix].RBD.CephMonitors + pool_m := existingPod.Volumes[ix].RBD.RBDPool + image_m := existingPod.Volumes[ix].RBD.RBDImage if haveSame(mon, mon_m) && pool_m == pool && image_m == image { return true } @@ -98,7 +98,11 @@ func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { // NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that // are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume -// can't be scheduled there. This is GCE, Amazon EBS, and Ceph RBD specific for now. +// can't be scheduled there. +// This is GCE, Amazon EBS, and Ceph RBD specific for now: +// - GCE PD allows multiple mounts as long as they're all read-only +// - AWS EBS forbids any two pods mounting the same volume ID +// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) { podSpec := &(pod.Spec)