Merge pull request #50106 from julia-stripe/improve-scheduler-error-handling

Automatic merge from submit-queue

Retry scheduling pods after errors more consistently in scheduler

**What this PR does / why we need it**:

This fixes 2 places in the scheduler where pods can get stuck in Pending forever.  In both these places, errors happen and `sched.config.Error` is not called afterwards. This is a problem because `sched.config.Error` is responsible for requeuing pods to retry scheduling when there are issues (see [here](2540b333b2/plugin/pkg/scheduler/factory/factory.go (L958))), so if we don't call `sched.config.Error` then the pod will never get scheduled (unless the scheduler is restarted).

One of these (where it returns when `ForgetPod` fails instead of continuing and reporting an error) is a regression from [this refactor](https://github.com/kubernetes/kubernetes/commit/ecb962e6585#diff-67f2b61521299ca8d8687b0933bbfb19L234), and with the [old behavior](80f26fa8a8/plugin/pkg/scheduler/scheduler.go (L233-L237)) the error was reported correctly. As far as I can tell changing the error handling in that refactor wasn't intentional.

When AssumePod fails there's never been an error reported but I think adding this will help the scheduler recover when something goes wrong instead of letting pods possibly never get scheduled.

This will help prevent issues like https://github.com/kubernetes/kubernetes/issues/49314 in the future.

**Release note**:

```release-note
Fix incorrect retry logic in scheduler
```
This commit is contained in:
Kubernetes Submit Queue 2017-08-07 01:35:17 -07:00 committed by GitHub
commit bc7ccfe93b

View File

@ -17,7 +17,6 @@ limitations under the License.
package scheduler package scheduler
import ( import (
"fmt"
"time" "time"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
@ -194,12 +193,20 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
assumed.Spec.NodeName = host assumed.Spec.NodeName = host
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil { if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
glog.Errorf("scheduler cache AssumePod failed: %v", err) glog.Errorf("scheduler cache AssumePod failed: %v", err)
// TODO: This means that a given pod is already in cache (which means it
// is either assumed or already added). This is most probably result of a // This is most probably result of a BUG in retrying logic.
// BUG in retrying logic. As a temporary workaround (which doesn't fully // We report an error here so that pod scheduling can be retried.
// fix the problem, but should reduce its impact), we simply return here, // This relies on the fact that Error will check if the pod has been bound
// as binding doesn't make sense anyway. // to a node and if so will not add it back to the unscheduled pods queue
// This should be fixed properly though. // (otherwise this would cause an infinite loop).
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePod failed: %v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "SchedulerError",
Message: err.Error(),
})
return err return err
} }
@ -219,10 +226,13 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
// If binding succeeded then PodScheduled condition will be updated in apiserver so that // If binding succeeded then PodScheduled condition will be updated in apiserver so that
// it's atomic with setting host. // it's atomic with setting host.
err := sched.config.Binder.Bind(b) err := sched.config.Binder.Bind(b)
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
glog.Errorf("scheduler cache FinishBinding failed: %v", err)
}
if err != nil { if err != nil {
glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name) glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil { if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
return fmt.Errorf("scheduler cache ForgetPod failed: %v", err) glog.Errorf("scheduler cache ForgetPod failed: %v", err)
} }
sched.config.Error(assumed, err) sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err) sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
@ -234,10 +244,6 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
return err return err
} }
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
return fmt.Errorf("scheduler cache FinishBinding failed: %v", err)
}
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name) sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
return nil return nil