delete a node from its cache if it gets node not found error

This commit is contained in:
wackxu 2017-11-30 16:46:17 +08:00
parent 160270800f
commit aac60b6cbb

View File

@ -1109,6 +1109,21 @@ func (factory *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, pod
} else {
if _, ok := err.(*core.FitError); ok {
glog.V(4).Infof("Unable to schedule %v %v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
} else if errors.IsNotFound(err) {
if errStatus, ok := err.(errors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
nodeName := errStatus.Status().Details.Name
// when node is not found, We do not remove the node right away. Trying again to get
// the node and if the node is still not found, then remove it from the scheduler cache.
_, err := factory.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil && errors.IsNotFound(err) {
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
factory.schedulerCache.RemoveNode(&node)
// invalidate cached predicate for the node
if factory.enableEquivalenceClassCache {
factory.equivalencePodCache.InvalidateAllCachedPredicateItemOfNode(nodeName)
}
}
}
} else {
glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)
}