Merge pull request #56622 from wackxu/nodemiss

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

delete a node from its cache if it gets node not found error

**What this PR does / why we need it**:

delete a node from its cache if it gets node not found error

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes # https://github.com/kubernetes/kubernetes/issues/56261

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2017-12-12 11:22:12 -08:00 committed by GitHub
commit 7335c41ebe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1139,6 +1139,21 @@ func (factory *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, pod
} else {
if _, ok := err.(*core.FitError); ok {
glog.V(4).Infof("Unable to schedule %v %v: no fit: %v; waiting", pod.Namespace, pod.Name, err)
} else if errors.IsNotFound(err) {
if errStatus, ok := err.(errors.APIStatus); ok && errStatus.Status().Details.Kind == "node" {
nodeName := errStatus.Status().Details.Name
// when node is not found, We do not remove the node right away. Trying again to get
// the node and if the node is still not found, then remove it from the scheduler cache.
_, err := factory.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil && errors.IsNotFound(err) {
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
factory.schedulerCache.RemoveNode(&node)
// invalidate cached predicate for the node
if factory.enableEquivalenceClassCache {
factory.equivalencePodCache.InvalidateAllCachedPredicateItemOfNode(nodeName)
}
}
}
} else {
glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)
}