Merge pull request #86919 from ahg-g/ahg-fix

Fix how we check for node info list consistency
This commit is contained in:
Kubernetes Prow Robot 2020-01-07 14:09:05 -08:00 committed by GitHub
commit 6099a739d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -261,12 +261,12 @@ func (cache *schedulerCache) UpdateNodeInfoSnapshot(nodeSnapshot *nodeinfosnapsh
cache.updateNodeInfoSnapshotList(nodeSnapshot, updateAllLists)
}
if len(nodeSnapshot.NodeInfoList) != len(nodeSnapshot.NodeInfoMap) {
errMsg := fmt.Sprintf("snapshot state is not consistent, length of NodeInfoList=%v not equal to length of NodeInfoMap=%v "+
"length of nodes in cache=%v, length of nodes in tree=%v"+
if len(nodeSnapshot.NodeInfoList) != cache.nodeTree.numNodes {
errMsg := fmt.Sprintf("snapshot state is not consistent, length of NodeInfoList=%v not equal to length of nodes in tree=%v "+
", length of NodeInfoMap=%v, length of nodes in cache=%v"+
", trying to recover",
len(nodeSnapshot.NodeInfoList), len(nodeSnapshot.NodeInfoMap),
len(cache.nodes), cache.nodeTree.numNodes)
len(nodeSnapshot.NodeInfoList), cache.nodeTree.numNodes,
len(nodeSnapshot.NodeInfoMap), len(cache.nodes))
klog.Error(errMsg)
// We will try to recover by re-creating the lists for the next scheduling cycle, but still return an
// error to surface the problem, the error will likely cause a failure to the current scheduling cycle.