mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #50106 from julia-stripe/improve-scheduler-error-handling
Automatic merge from submit-queue Retry scheduling pods after errors more consistently in scheduler **What this PR does / why we need it**: This fixes 2 places in the scheduler where pods can get stuck in Pending forever. In both these places, errors happen and `sched.config.Error` is not called afterwards. This is a problem because `sched.config.Error` is responsible for requeuing pods to retry scheduling when there are issues (see [here](2540b333b2/plugin/pkg/scheduler/factory/factory.go (L958)
)), so if we don't call `sched.config.Error` then the pod will never get scheduled (unless the scheduler is restarted). One of these (where it returns when `ForgetPod` fails instead of continuing and reporting an error) is a regression from [this refactor](https://github.com/kubernetes/kubernetes/commit/ecb962e6585#diff-67f2b61521299ca8d8687b0933bbfb19L234), and with the [old behavior](80f26fa8a8/plugin/pkg/scheduler/scheduler.go (L233-L237)
) the error was reported correctly. As far as I can tell changing the error handling in that refactor wasn't intentional. When AssumePod fails there's never been an error reported but I think adding this will help the scheduler recover when something goes wrong instead of letting pods possibly never get scheduled. This will help prevent issues like https://github.com/kubernetes/kubernetes/issues/49314 in the future. **Release note**: ```release-note Fix incorrect retry logic in scheduler ```
This commit is contained in:
commit
bc7ccfe93b
@ -17,7 +17,6 @@ limitations under the License.
|
|||||||
package scheduler
|
package scheduler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
@ -194,12 +193,20 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
|
|||||||
assumed.Spec.NodeName = host
|
assumed.Spec.NodeName = host
|
||||||
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
|
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
|
||||||
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
||||||
// TODO: This means that a given pod is already in cache (which means it
|
|
||||||
// is either assumed or already added). This is most probably result of a
|
// This is most probably result of a BUG in retrying logic.
|
||||||
// BUG in retrying logic. As a temporary workaround (which doesn't fully
|
// We report an error here so that pod scheduling can be retried.
|
||||||
// fix the problem, but should reduce its impact), we simply return here,
|
// This relies on the fact that Error will check if the pod has been bound
|
||||||
// as binding doesn't make sense anyway.
|
// to a node and if so will not add it back to the unscheduled pods queue
|
||||||
// This should be fixed properly though.
|
// (otherwise this would cause an infinite loop).
|
||||||
|
sched.config.Error(assumed, err)
|
||||||
|
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePod failed: %v", err)
|
||||||
|
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
||||||
|
Type: v1.PodScheduled,
|
||||||
|
Status: v1.ConditionFalse,
|
||||||
|
Reason: "SchedulerError",
|
||||||
|
Message: err.Error(),
|
||||||
|
})
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,10 +226,13 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
|
|||||||
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
|
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
|
||||||
// it's atomic with setting host.
|
// it's atomic with setting host.
|
||||||
err := sched.config.Binder.Bind(b)
|
err := sched.config.Binder.Bind(b)
|
||||||
|
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
|
||||||
|
glog.Errorf("scheduler cache FinishBinding failed: %v", err)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
|
glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
|
||||||
if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
|
if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
|
||||||
return fmt.Errorf("scheduler cache ForgetPod failed: %v", err)
|
glog.Errorf("scheduler cache ForgetPod failed: %v", err)
|
||||||
}
|
}
|
||||||
sched.config.Error(assumed, err)
|
sched.config.Error(assumed, err)
|
||||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
|
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
|
||||||
@ -234,10 +244,6 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
|
|
||||||
return fmt.Errorf("scheduler cache FinishBinding failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
||||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
|
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
|
||||||
return nil
|
return nil
|
||||||
|
Loading…
Reference in New Issue
Block a user