diff --git a/pkg/scheduler/predicates.go b/pkg/scheduler/predicates.go index 74628af4b84..7c95a3dd58d 100644 --- a/pkg/scheduler/predicates.go +++ b/pkg/scheduler/predicates.go @@ -51,6 +51,39 @@ func (nodes ClientNodeInfo) GetNodeInfo(nodeID string) (*api.Minion, error) { return nodes.GetMinion(nodeID) } +func isVolumeConflict(volume api.Volume, pod *api.Pod) bool { + if volume.Source.GCEPersistentDisk == nil { + return false + } + pdName := volume.Source.GCEPersistentDisk.PDName + + manifest := &(pod.DesiredState.Manifest) + for ix := range manifest.Volumes { + if manifest.Volumes[ix].Source.GCEPersistentDisk != nil && + manifest.Volumes[ix].Source.GCEPersistentDisk.PDName == pdName { + return true + } + } + return false +} + +// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that +// are already mounted. Some times of volumes are mounted onto node machines. For now, these mounts +// are exclusive so if there is already a volume mounted on that node, another pod can't schedule +// there. This is GCE specific for now. +// TODO: migrate this into some per-volume specific code? +func NoDiskConflict(pod api.Pod, existingPods []api.Pod, node string) (bool, error) { + manifest := &(pod.DesiredState.Manifest) + for ix := range manifest.Volumes { + for podIx := range existingPods { + if isVolumeConflict(manifest.Volumes[ix], &existingPods[podIx]) { + return false, nil + } + } + } + return true, nil +} + type ResourceFit struct { info NodeInfo } diff --git a/pkg/scheduler/predicates_test.go b/pkg/scheduler/predicates_test.go index aa2891f1d66..91a55371f70 100644 --- a/pkg/scheduler/predicates_test.go +++ b/pkg/scheduler/predicates_test.go @@ -181,3 +181,56 @@ func TestPodFitsPorts(t *testing.T) { } } } + +func TestDiskConflicts(t *testing.T) { + volState := api.PodState{ + Manifest: api.ContainerManifest{ + Volumes: []api.Volume{ + { + Source: &api.VolumeSource{ + GCEPersistentDisk: &api.GCEPersistentDisk{ + PDName: "foo", + }, + }, + }, + }, + }, + } + volState2 := api.PodState{ + Manifest: api.ContainerManifest{ + Volumes: []api.Volume{ + { + Source: &api.VolumeSource{ + GCEPersistentDisk: &api.GCEPersistentDisk{ + PDName: "bar", + }, + }, + }, + }, + }, + } + tests := []struct { + pod api.Pod + existingPods []api.Pod + isOk bool + test string + }{ + {api.Pod{}, []api.Pod{}, true, "nothing"}, + {api.Pod{}, []api.Pod{{DesiredState: volState}}, true, "one state"}, + {api.Pod{DesiredState: volState}, []api.Pod{{DesiredState: volState}}, false, "same state"}, + {api.Pod{DesiredState: volState2}, []api.Pod{{DesiredState: volState}}, true, "different state"}, + } + + for _, test := range tests { + ok, err := NoDiskConflict(test.pod, test.existingPods, "machine") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if test.isOk && !ok { + t.Errorf("expected ok, got none. %v %v %s", test.pod, test.existingPods, test.test) + } + if !test.isOk && ok { + t.Errorf("expected no ok, got one. %v %v %s", test.pod, test.existingPods, test.test) + } + } +} diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index 91948c6c785..81d413ff385 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -72,6 +72,8 @@ func (factory *ConfigFactory) Create() *scheduler.Config { algorithm.PodFitsPorts, // Fit is determined by resource availability algorithm.NewResourceFitPredicate(minionLister), + // Fit is determined by non-conflicting disk volumes + algorithm.NoDiskConflict, }, // Prioritize nodes by least requested utilization. algorithm.LeastRequestedPriority,