From aac60b6cbbe330bd6c9ef37bc52ec607fe8f6298 Mon Sep 17 00:00:00 2001 From: wackxu Date: Thu, 30 Nov 2017 16:46:17 +0800 Subject: [PATCH] delete a node from its cache if it gets node not found error --- plugin/pkg/scheduler/factory/factory.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index 764f449ccc4..a52b8d69fc0 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -1109,6 +1109,21 @@ func (factory *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, pod } else { if _, ok := err.(*core.FitError); ok { glog.V(4).Infof("Unable to schedule %v %v: no fit: %v; waiting", pod.Namespace, pod.Name, err) + } else if errors.IsNotFound(err) { + if errStatus, ok := err.(errors.APIStatus); ok && errStatus.Status().Details.Kind == "node" { + nodeName := errStatus.Status().Details.Name + // when node is not found, We do not remove the node right away. Trying again to get + // the node and if the node is still not found, then remove it from the scheduler cache. + _, err := factory.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil && errors.IsNotFound(err) { + node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} + factory.schedulerCache.RemoveNode(&node) + // invalidate cached predicate for the node + if factory.enableEquivalenceClassCache { + factory.equivalencePodCache.InvalidateAllCachedPredicateItemOfNode(nodeName) + } + } + } } else { glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err) }