Merge pull request #24197 from wojtek-t/parallel_scheduler

Automatic merge from submit-queue

Asynchronous bindings

This increases scheduler throughput with "trivial algorithm" (choose random node) by at least 5x.
Such optimization is necessary if we want to significantly improve scheduling throughput.

Fix #24192

@gmarek @kubernetes/sig-scalability @hongchaodeng
This commit is contained in:
k8s-merge-robot 2016-04-16 04:39:37 -07:00
commit b5b190df2d
2 changed files with 37 additions and 31 deletions

View File

@ -90,12 +90,27 @@ func (s *Scheduler) scheduleOne() {
dest, err := s.config.Algorithm.Schedule(pod, s.config.NodeLister) dest, err := s.config.Algorithm.Schedule(pod, s.config.NodeLister)
if err != nil { if err != nil {
glog.V(1).Infof("Failed to schedule: %+v", pod) glog.V(1).Infof("Failed to schedule: %+v", pod)
s.config.Recorder.Eventf(pod, api.EventTypeWarning, "FailedScheduling", "%v", err)
s.config.Error(pod, err) s.config.Error(pod, err)
s.config.Recorder.Eventf(pod, api.EventTypeWarning, "FailedScheduling", "%v", err)
return return
} }
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
// Optimistically assume that the binding will succeed and send it to apiserver
// in the background.
// The only risk in this approach is that if the binding fails because of some
// reason, scheduler will be assuming that it succeeded while scheduling next
// pods, until the assumption in the internal cache expire (expiration is
// defined as "didn't read the binding via watch within a given timeout",
// timeout is currently set to 30s). However, after this timeout, the situation
// will self-repair.
assumed := *pod
assumed.Spec.NodeName = dest
s.config.SchedulerCache.AssumePodIfBindSucceed(&assumed, func() bool { return true })
go func() {
defer metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
b := &api.Binding{ b := &api.Binding{
ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name}, ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
Target: api.ObjectReference{ Target: api.ObjectReference{
@ -104,25 +119,15 @@ func (s *Scheduler) scheduleOne() {
}, },
} }
bindAction := func() bool {
bindingStart := time.Now() bindingStart := time.Now()
err := s.config.Binder.Bind(b) err := s.config.Binder.Bind(b)
if err != nil { if err != nil {
glog.V(1).Infof("Failed to bind pod: %+v", err) glog.V(1).Infof("Failed to bind pod: %+v", err)
s.config.Recorder.Eventf(pod, api.EventTypeNormal, "FailedScheduling", "Binding rejected: %v", err)
s.config.Error(pod, err) s.config.Error(pod, err)
return false s.config.Recorder.Eventf(pod, api.EventTypeNormal, "FailedScheduling", "Binding rejected: %v", err)
return
} }
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
s.config.Recorder.Eventf(pod, api.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest) s.config.Recorder.Eventf(pod, api.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest)
return true }()
}
assumed := *pod
assumed.Spec.NodeName = dest
// We want to assume the pod if and only if the bind succeeds,
// but we don't want to race with any deletions, which happen asynchronously.
s.config.SchedulerCache.AssumePodIfBindSucceed(&assumed, bindAction)
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
} }

View File

@ -101,6 +101,7 @@ func TestScheduler(t *testing.T) {
sendPod: podWithID("foo", ""), sendPod: podWithID("foo", ""),
algo: mockScheduler{"machine1", nil}, algo: mockScheduler{"machine1", nil},
expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}}, expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}},
expectAssumedPod: podWithID("foo", "machine1"),
injectBindError: errB, injectBindError: errB,
expectError: errB, expectError: errB,
expectErrorPod: podWithID("foo", ""), expectErrorPod: podWithID("foo", ""),
@ -145,6 +146,7 @@ func TestScheduler(t *testing.T) {
close(called) close(called)
}) })
s.scheduleOne() s.scheduleOne()
<-called
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) { if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a) t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
} }
@ -157,7 +159,6 @@ func TestScheduler(t *testing.T) {
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) { if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a)) t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a))
} }
<-called
events.Stop() events.Stop()
} }
} }
@ -250,6 +251,7 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
// assumedPods: [] // assumedPods: []
s.scheduleOne() s.scheduleOne()
<-called
// queuedPodStore: [] // queuedPodStore: []
// scheduledPodStore: [foo:8080] // scheduledPodStore: [foo:8080]
// assumedPods: [foo:8080] // assumedPods: [foo:8080]
@ -271,7 +273,6 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac)) t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac))
} }
<-called
events.Stop() events.Stop()
scheduledPodStore.Delete(pod) scheduledPodStore.Delete(pod)
@ -312,6 +313,7 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
}) })
s.scheduleOne() s.scheduleOne()
<-called
expectBind = &api.Binding{ expectBind = &api.Binding{
ObjectMeta: api.ObjectMeta{Name: "bar"}, ObjectMeta: api.ObjectMeta{Name: "bar"},
@ -320,6 +322,5 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
if ex, ac := expectBind, gotBinding; !reflect.DeepEqual(ex, ac) { if ex, ac := expectBind, gotBinding; !reflect.DeepEqual(ex, ac) {
t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac)) t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac))
} }
<-called
events.Stop() events.Stop()
} }