Add a system modeler to scheduler

So it can try to predict the effect its bindings will have.
This commit is contained in:
Daniel Smith
2015-03-12 17:28:00 -07:00
parent 6319f8a568
commit 992d78a32e
8 changed files with 377 additions and 26 deletions

View File

@@ -41,23 +41,31 @@ type ConfigFactory struct {
Client *client.Client
// queue for pods that need scheduling
PodQueue *cache.FIFO
// a means to list all scheduled pods
PodLister *cache.StoreToPodLister
// a means to list all known scheduled pods.
ScheduledPodLister *cache.StoreToPodLister
// a means to list all known scheduled pods and pods assumed to have been scheduled.
PodLister algorithm.PodLister
// a means to list all minions
NodeLister *cache.StoreToNodeLister
// a means to list all services
ServiceLister *cache.StoreToServiceLister
modeler scheduler.SystemModeler
}
// Initializes the factory.
func NewConfigFactory(client *client.Client) *ConfigFactory {
return &ConfigFactory{
Client: client,
PodQueue: cache.NewFIFO(cache.MetaNamespaceKeyFunc),
PodLister: &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
NodeLister: &cache.StoreToNodeLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
ServiceLister: &cache.StoreToServiceLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
c := &ConfigFactory{
Client: client,
PodQueue: cache.NewFIFO(cache.MetaNamespaceKeyFunc),
ScheduledPodLister: &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
NodeLister: &cache.StoreToNodeLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
ServiceLister: &cache.StoreToServiceLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
}
modeler := scheduler.NewSimpleModeler(&cache.StoreToPodLister{c.PodQueue}, c.ScheduledPodLister)
c.modeler = modeler
c.PodLister = modeler.PodLister()
return c
}
// Create creates a scheduler with the default algorithm provider.
@@ -118,7 +126,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys util.StringSe
// Watch and cache all running pods. Scheduler needs to find all pods
// so it knows where it's safe to place a pod. Cache this locally.
cache.NewReflector(f.createAssignedPodLW(), &api.Pod{}, f.PodLister.Store, 0).Run()
cache.NewReflector(f.createAssignedPodLW(), &api.Pod{}, f.ScheduledPodLister.Store, 0).Run()
// Watch minions.
// Minions may be listed frequently, so provide a local up-to-date cache.
@@ -148,6 +156,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys util.StringSe
}
return &scheduler.Config{
Modeler: f.modeler,
MinionLister: f.NodeLister,
Algorithm: algo,
Binder: &binder{f.Client},

View File

@@ -0,0 +1,155 @@
/*
Copyright 2015 Google Inc. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"strings"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
algorithm "github.com/GoogleCloudPlatform/kubernetes/pkg/scheduler"
"github.com/golang/glog"
)
var (
_ = SystemModeler(&FakeModeler{})
_ = SystemModeler(&SimpleModeler{})
)
// ExtendedPodLister: SimpleModeler needs to be able to check for a pod's
// existance in addition to listing the pods.
type ExtendedPodLister interface {
algorithm.PodLister
Exists(pod *api.Pod) (bool, error)
}
// FakeModeler implements the SystemModeler interface.
type FakeModeler struct {
AssumePodFunc func(pod *api.Pod)
}
// AssumePod calls the function variable if it is not nil.
func (f *FakeModeler) AssumePod(pod *api.Pod) {
if f.AssumePodFunc != nil {
f.AssumePodFunc(pod)
}
}
// SimpleModeler implements the SystemModeler interface with a timed pod cache.
type SimpleModeler struct {
queuedPods ExtendedPodLister
scheduledPods ExtendedPodLister
// assumedPods holds the pods that we think we've scheduled, but that
// haven't yet shown up in the scheduledPods variable.
// TODO: periodically clear this.
assumedPods *cache.StoreToPodLister
}
// NewSimpleModeler returns a new SimpleModeler.
// queuedPods: a PodLister that will return pods that have not been scheduled yet.
// scheduledPods: a PodLister that will return pods that we know for sure have been scheduled.
func NewSimpleModeler(queuedPods, scheduledPods ExtendedPodLister) *SimpleModeler {
return &SimpleModeler{
queuedPods: queuedPods,
scheduledPods: scheduledPods,
assumedPods: &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
}
}
func (s *SimpleModeler) AssumePod(pod *api.Pod) {
s.assumedPods.Add(pod)
}
// Extract names for readable logging.
func podNames(pods []api.Pod) []string {
out := make([]string, len(pods))
for i := range pods {
out[i] = fmt.Sprintf("'%v/%v (%v)'", pods[i].Namespace, pods[i].Name, pods[i].UID)
}
return out
}
func (s *SimpleModeler) listPods(selector labels.Selector) (pods []api.Pod, err error) {
assumed, err := s.assumedPods.List(selector)
if err != nil {
return nil, err
}
// Since the assumed list will be short, just check every one.
// Goal here is to stop making assumptions about a pod once it shows
// up in one of these other lists.
// TODO: there's a possibility that a pod could get deleted at the
// exact wrong time and linger in assumedPods forever. So we
// need go through that periodically and check for deleted
// pods.
for _, pod := range assumed {
qExist, err := s.queuedPods.Exists(&pod)
if err != nil {
return nil, err
}
if qExist {
s.assumedPods.Store.Delete(&pod)
continue
}
sExist, err := s.scheduledPods.Exists(&pod)
if err != nil {
return nil, err
}
if sExist {
s.assumedPods.Store.Delete(&pod)
continue
}
}
scheduled, err := s.scheduledPods.List(selector)
if err != nil {
return nil, err
}
// re-get in case we deleted any.
assumed, err = s.assumedPods.List(selector)
if err != nil {
return nil, err
}
if len(assumed) == 0 {
return scheduled, nil
}
glog.V(2).Infof(
"listing pods: [%v] assumed to exist in addition to %v known pods.",
strings.Join(podNames(assumed), ","),
len(scheduled),
)
return append(scheduled, assumed...), nil
}
// PodLister returns a PodLister that will list pods that we think we have scheduled in
// addition to pods that we know have been scheduled.
func (s *SimpleModeler) PodLister() algorithm.PodLister {
return simpleModelerPods{s}
}
// simpleModelerPods is an adaptor so that SimpleModeler can be a PodLister.
type simpleModelerPods struct {
simpleModeler *SimpleModeler
}
// List returns pods known and assumed to exist.
func (s simpleModelerPods) List(selector labels.Selector) (pods []api.Pod, err error) {
return s.simpleModeler.listPods(selector)
}

View File

@@ -0,0 +1,111 @@
/*
Copyright 2015 Google Inc. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
)
type nn struct {
namespace, name string
}
type names []nn
func (ids names) list() []api.Pod {
out := make([]api.Pod, len(ids))
for i, id := range ids {
out[i] = api.Pod{
ObjectMeta: api.ObjectMeta{
Namespace: id.namespace,
Name: id.name,
},
}
}
return out
}
func (ids names) has(pod *api.Pod) bool {
for _, id := range ids {
if pod.Namespace == id.namespace && pod.Name == id.name {
return true
}
}
return false
}
func TestModeler(t *testing.T) {
table := []struct {
queuedPods []api.Pod
scheduledPods []api.Pod
assumedPods []api.Pod
expectPods names
}{
{
queuedPods: names{}.list(),
scheduledPods: names{{"default", "foo"}, {"custom", "foo"}}.list(),
assumedPods: names{{"default", "foo"}}.list(),
expectPods: names{{"default", "foo"}, {"custom", "foo"}},
}, {
queuedPods: names{}.list(),
scheduledPods: names{{"default", "foo"}}.list(),
assumedPods: names{{"default", "foo"}, {"custom", "foo"}}.list(),
expectPods: names{{"default", "foo"}, {"custom", "foo"}},
}, {
queuedPods: names{{"custom", "foo"}}.list(),
scheduledPods: names{{"default", "foo"}}.list(),
assumedPods: names{{"default", "foo"}, {"custom", "foo"}}.list(),
expectPods: names{{"default", "foo"}},
},
}
for _, item := range table {
q := &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}
for i := range item.queuedPods {
q.Store.Add(&item.queuedPods[i])
}
s := &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}
for i := range item.scheduledPods {
s.Store.Add(&item.scheduledPods[i])
}
m := NewSimpleModeler(q, s)
for i := range item.assumedPods {
m.AssumePod(&item.assumedPods[i])
}
list, err := m.PodLister().List(labels.Everything())
if err != nil {
t.Errorf("unexpected error: %v", err)
}
found := 0
for _, pod := range list {
if item.expectPods.has(&pod) {
found++
} else {
t.Errorf("found unexpected pod %#v", pod)
}
}
if e, a := item.expectPods, found; len(e) != a {
t.Errorf("Expected pods:\n%+v\nFound pods:\n%v\n", e, list)
}
}
}

View File

@@ -31,6 +31,21 @@ type Binder interface {
Bind(binding *api.Binding) error
}
// SystemModeler can help scheduler produce a model of the system that
// anticipates reality. For example, if scheduler has pods A and B both
// using hostPort 80, when it binds A to machine M it should not bind B
// to machine M in the time when it hasn't observed the binding of A
// take effect yet.
//
// Since the model is only an optimization, it's expected to handle
// any errors itself without sending them back to the scheduler.
type SystemModeler interface {
// AssumePod assumes that the given pod exists in the system.
// The assumtion should last until the system confirms the
// assumtion or disconfirms it.
AssumePod(pod *api.Pod)
}
// Scheduler watches for new unscheduled pods. It attempts to find
// minions that they fit on and writes bindings back to the api server.
type Scheduler struct {
@@ -38,6 +53,9 @@ type Scheduler struct {
}
type Config struct {
// It is expected that changes made via modeler will be observed
// by MinionLister and Algorithm.
Modeler SystemModeler
MinionLister scheduler.MinionLister
Algorithm scheduler.Scheduler
Binder Binder
@@ -93,4 +111,9 @@ func (s *Scheduler) scheduleOne() {
return
}
s.config.Recorder.Eventf(pod, "scheduled", "Successfully assigned %v to %v", pod.Name, dest)
// tell the model to assume that this binding took effect.
assumed := *pod
assumed.Spec.Host = dest
assumed.Status.Host = dest
s.config.Modeler.AssumePod(&assumed)
}

View File

@@ -34,8 +34,16 @@ type fakeBinder struct {
func (fb fakeBinder) Bind(binding *api.Binding) error { return fb.b(binding) }
func podWithID(id string) *api.Pod {
return &api.Pod{ObjectMeta: api.ObjectMeta{Name: id, SelfLink: testapi.SelfLink("pods", id)}}
func podWithID(id, desiredHost string) *api.Pod {
return &api.Pod{
ObjectMeta: api.ObjectMeta{Name: id, SelfLink: testapi.SelfLink("pods", id)},
Spec: api.PodSpec{
Host: desiredHost,
},
Status: api.PodStatus{
Host: desiredHost,
},
}
}
type mockScheduler struct {
@@ -53,32 +61,34 @@ func TestScheduler(t *testing.T) {
errB := errors.New("binder")
table := []struct {
injectBindError error
sendPod *api.Pod
algo scheduler.Scheduler
expectErrorPod *api.Pod
expectError error
expectBind *api.Binding
eventReason string
injectBindError error
sendPod *api.Pod
algo scheduler.Scheduler
expectErrorPod *api.Pod
expectAssumedPod *api.Pod
expectError error
expectBind *api.Binding
eventReason string
}{
{
sendPod: podWithID("foo"),
algo: mockScheduler{"machine1", nil},
expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "scheduled",
sendPod: podWithID("foo", ""),
algo: mockScheduler{"machine1", nil},
expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}},
expectAssumedPod: podWithID("foo", "machine1"),
eventReason: "scheduled",
}, {
sendPod: podWithID("foo"),
sendPod: podWithID("foo", ""),
algo: mockScheduler{"machine1", errS},
expectError: errS,
expectErrorPod: podWithID("foo"),
expectErrorPod: podWithID("foo", ""),
eventReason: "failedScheduling",
}, {
sendPod: podWithID("foo"),
sendPod: podWithID("foo", ""),
algo: mockScheduler{"machine1", nil},
expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}},
injectBindError: errB,
expectError: errB,
expectErrorPod: podWithID("foo"),
expectErrorPod: podWithID("foo", ""),
eventReason: "failedScheduling",
},
}
@@ -86,8 +96,14 @@ func TestScheduler(t *testing.T) {
for i, item := range table {
var gotError error
var gotPod *api.Pod
var gotAssumedPod *api.Pod
var gotBinding *api.Binding
c := &Config{
Modeler: &FakeModeler{
AssumePodFunc: func(pod *api.Pod) {
gotAssumedPod = pod
},
},
MinionLister: scheduler.FakeMinionLister(
api.NodeList{Items: []api.Node{{ObjectMeta: api.ObjectMeta{Name: "machine1"}}}},
),
@@ -114,6 +130,9 @@ func TestScheduler(t *testing.T) {
close(called)
})
s.scheduleOne()
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
}