mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Merge pull request #50106 from julia-stripe/improve-scheduler-error-handling
Automatic merge from submit-queue Retry scheduling pods after errors more consistently in scheduler **What this PR does / why we need it**: This fixes 2 places in the scheduler where pods can get stuck in Pending forever. In both these places, errors happen and `sched.config.Error` is not called afterwards. This is a problem because `sched.config.Error` is responsible for requeuing pods to retry scheduling when there are issues (see [here](2540b333b2/plugin/pkg/scheduler/factory/factory.go (L958)
)), so if we don't call `sched.config.Error` then the pod will never get scheduled (unless the scheduler is restarted). One of these (where it returns when `ForgetPod` fails instead of continuing and reporting an error) is a regression from [this refactor](https://github.com/kubernetes/kubernetes/commit/ecb962e6585#diff-67f2b61521299ca8d8687b0933bbfb19L234), and with the [old behavior](80f26fa8a8/plugin/pkg/scheduler/scheduler.go (L233-L237)
) the error was reported correctly. As far as I can tell changing the error handling in that refactor wasn't intentional. When AssumePod fails there's never been an error reported but I think adding this will help the scheduler recover when something goes wrong instead of letting pods possibly never get scheduled. This will help prevent issues like https://github.com/kubernetes/kubernetes/issues/49314 in the future. **Release note**: ```release-note Fix incorrect retry logic in scheduler ```
This commit is contained in:
commit
bc7ccfe93b
@ -17,7 +17,6 @@ limitations under the License.
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
@ -194,12 +193,20 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
|
||||
assumed.Spec.NodeName = host
|
||||
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
|
||||
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
||||
// TODO: This means that a given pod is already in cache (which means it
|
||||
// is either assumed or already added). This is most probably result of a
|
||||
// BUG in retrying logic. As a temporary workaround (which doesn't fully
|
||||
// fix the problem, but should reduce its impact), we simply return here,
|
||||
// as binding doesn't make sense anyway.
|
||||
// This should be fixed properly though.
|
||||
|
||||
// This is most probably result of a BUG in retrying logic.
|
||||
// We report an error here so that pod scheduling can be retried.
|
||||
// This relies on the fact that Error will check if the pod has been bound
|
||||
// to a node and if so will not add it back to the unscheduled pods queue
|
||||
// (otherwise this would cause an infinite loop).
|
||||
sched.config.Error(assumed, err)
|
||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePod failed: %v", err)
|
||||
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: "SchedulerError",
|
||||
Message: err.Error(),
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
@ -219,10 +226,13 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
|
||||
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
|
||||
// it's atomic with setting host.
|
||||
err := sched.config.Binder.Bind(b)
|
||||
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
|
||||
glog.Errorf("scheduler cache FinishBinding failed: %v", err)
|
||||
}
|
||||
if err != nil {
|
||||
glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
|
||||
if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
|
||||
return fmt.Errorf("scheduler cache ForgetPod failed: %v", err)
|
||||
glog.Errorf("scheduler cache ForgetPod failed: %v", err)
|
||||
}
|
||||
sched.config.Error(assumed, err)
|
||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
|
||||
@ -234,10 +244,6 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
|
||||
return fmt.Errorf("scheduler cache FinishBinding failed: %v", err)
|
||||
}
|
||||
|
||||
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
|
||||
return nil
|
||||
|
Loading…
Reference in New Issue
Block a user