mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 05:27:21 +00:00
Merge pull request #24197 from wojtek-t/parallel_scheduler
Automatic merge from submit-queue Asynchronous bindings This increases scheduler throughput with "trivial algorithm" (choose random node) by at least 5x. Such optimization is necessary if we want to significantly improve scheduling throughput. Fix #24192 @gmarek @kubernetes/sig-scalability @hongchaodeng
This commit is contained in:
commit
b5b190df2d
@ -90,12 +90,27 @@ func (s *Scheduler) scheduleOne() {
|
|||||||
dest, err := s.config.Algorithm.Schedule(pod, s.config.NodeLister)
|
dest, err := s.config.Algorithm.Schedule(pod, s.config.NodeLister)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(1).Infof("Failed to schedule: %+v", pod)
|
glog.V(1).Infof("Failed to schedule: %+v", pod)
|
||||||
s.config.Recorder.Eventf(pod, api.EventTypeWarning, "FailedScheduling", "%v", err)
|
|
||||||
s.config.Error(pod, err)
|
s.config.Error(pod, err)
|
||||||
|
s.config.Recorder.Eventf(pod, api.EventTypeWarning, "FailedScheduling", "%v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||||
|
|
||||||
|
// Optimistically assume that the binding will succeed and send it to apiserver
|
||||||
|
// in the background.
|
||||||
|
// The only risk in this approach is that if the binding fails because of some
|
||||||
|
// reason, scheduler will be assuming that it succeeded while scheduling next
|
||||||
|
// pods, until the assumption in the internal cache expire (expiration is
|
||||||
|
// defined as "didn't read the binding via watch within a given timeout",
|
||||||
|
// timeout is currently set to 30s). However, after this timeout, the situation
|
||||||
|
// will self-repair.
|
||||||
|
assumed := *pod
|
||||||
|
assumed.Spec.NodeName = dest
|
||||||
|
s.config.SchedulerCache.AssumePodIfBindSucceed(&assumed, func() bool { return true })
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||||
|
|
||||||
b := &api.Binding{
|
b := &api.Binding{
|
||||||
ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
|
ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
|
||||||
Target: api.ObjectReference{
|
Target: api.ObjectReference{
|
||||||
@ -104,25 +119,15 @@ func (s *Scheduler) scheduleOne() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
bindAction := func() bool {
|
|
||||||
bindingStart := time.Now()
|
bindingStart := time.Now()
|
||||||
err := s.config.Binder.Bind(b)
|
err := s.config.Binder.Bind(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(1).Infof("Failed to bind pod: %+v", err)
|
glog.V(1).Infof("Failed to bind pod: %+v", err)
|
||||||
s.config.Recorder.Eventf(pod, api.EventTypeNormal, "FailedScheduling", "Binding rejected: %v", err)
|
|
||||||
s.config.Error(pod, err)
|
s.config.Error(pod, err)
|
||||||
return false
|
s.config.Recorder.Eventf(pod, api.EventTypeNormal, "FailedScheduling", "Binding rejected: %v", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
||||||
s.config.Recorder.Eventf(pod, api.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest)
|
s.config.Recorder.Eventf(pod, api.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest)
|
||||||
return true
|
}()
|
||||||
}
|
|
||||||
|
|
||||||
assumed := *pod
|
|
||||||
assumed.Spec.NodeName = dest
|
|
||||||
// We want to assume the pod if and only if the bind succeeds,
|
|
||||||
// but we don't want to race with any deletions, which happen asynchronously.
|
|
||||||
s.config.SchedulerCache.AssumePodIfBindSucceed(&assumed, bindAction)
|
|
||||||
|
|
||||||
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
|
||||||
}
|
}
|
||||||
|
@ -101,6 +101,7 @@ func TestScheduler(t *testing.T) {
|
|||||||
sendPod: podWithID("foo", ""),
|
sendPod: podWithID("foo", ""),
|
||||||
algo: mockScheduler{"machine1", nil},
|
algo: mockScheduler{"machine1", nil},
|
||||||
expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}},
|
expectBind: &api.Binding{ObjectMeta: api.ObjectMeta{Name: "foo"}, Target: api.ObjectReference{Kind: "Node", Name: "machine1"}},
|
||||||
|
expectAssumedPod: podWithID("foo", "machine1"),
|
||||||
injectBindError: errB,
|
injectBindError: errB,
|
||||||
expectError: errB,
|
expectError: errB,
|
||||||
expectErrorPod: podWithID("foo", ""),
|
expectErrorPod: podWithID("foo", ""),
|
||||||
@ -145,6 +146,7 @@ func TestScheduler(t *testing.T) {
|
|||||||
close(called)
|
close(called)
|
||||||
})
|
})
|
||||||
s.scheduleOne()
|
s.scheduleOne()
|
||||||
|
<-called
|
||||||
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
|
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
|
||||||
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
|
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
|
||||||
}
|
}
|
||||||
@ -157,7 +159,6 @@ func TestScheduler(t *testing.T) {
|
|||||||
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
|
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
|
||||||
t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a))
|
t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a))
|
||||||
}
|
}
|
||||||
<-called
|
|
||||||
events.Stop()
|
events.Stop()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,6 +251,7 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
|
|||||||
// assumedPods: []
|
// assumedPods: []
|
||||||
|
|
||||||
s.scheduleOne()
|
s.scheduleOne()
|
||||||
|
<-called
|
||||||
// queuedPodStore: []
|
// queuedPodStore: []
|
||||||
// scheduledPodStore: [foo:8080]
|
// scheduledPodStore: [foo:8080]
|
||||||
// assumedPods: [foo:8080]
|
// assumedPods: [foo:8080]
|
||||||
@ -271,7 +273,6 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
|
|||||||
t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac))
|
t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac))
|
||||||
}
|
}
|
||||||
|
|
||||||
<-called
|
|
||||||
events.Stop()
|
events.Stop()
|
||||||
|
|
||||||
scheduledPodStore.Delete(pod)
|
scheduledPodStore.Delete(pod)
|
||||||
@ -312,6 +313,7 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
s.scheduleOne()
|
s.scheduleOne()
|
||||||
|
<-called
|
||||||
|
|
||||||
expectBind = &api.Binding{
|
expectBind = &api.Binding{
|
||||||
ObjectMeta: api.ObjectMeta{Name: "bar"},
|
ObjectMeta: api.ObjectMeta{Name: "bar"},
|
||||||
@ -320,6 +322,5 @@ func TestSchedulerForgetAssumedPodAfterDelete(t *testing.T) {
|
|||||||
if ex, ac := expectBind, gotBinding; !reflect.DeepEqual(ex, ac) {
|
if ex, ac := expectBind, gotBinding; !reflect.DeepEqual(ex, ac) {
|
||||||
t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac))
|
t.Errorf("Expected exact match on binding: %s", diff.ObjectDiff(ex, ac))
|
||||||
}
|
}
|
||||||
<-called
|
|
||||||
events.Stop()
|
events.Stop()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user