Merge pull request #105969 from shivanshu1333/feature4/master/105841

Migrated scheduler file `cache.go` to structured logging
This commit is contained in:
Kubernetes Prow Robot 2021-11-09 02:05:39 -08:00 committed by GitHub
commit 927914dec7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -18,6 +18,7 @@ package cache
import (
"fmt"
"os"
"sync"
"time"
@ -123,7 +124,7 @@ func newNodeInfoListItem(ni *framework.NodeInfo) *nodeInfoListItem {
func (cache *schedulerCache) moveNodeInfoToHead(name string) {
ni, ok := cache.nodes[name]
if !ok {
klog.Errorf("No NodeInfo with name %v found in the cache", name)
klog.ErrorS(nil, "No node info with given name found in the cache", "node", klog.KRef("", name))
return
}
// if the node info list item is already at the head, we are done.
@ -151,7 +152,7 @@ func (cache *schedulerCache) moveNodeInfoToHead(name string) {
func (cache *schedulerCache) removeNodeInfoFromList(name string) {
ni, ok := cache.nodes[name]
if !ok {
klog.Errorf("No NodeInfo with name %v found in the cache", name)
klog.ErrorS(nil, "No node info with given name found in the cache", "node", klog.KRef("", name))
return
}
@ -264,7 +265,7 @@ func (cache *schedulerCache) UpdateSnapshot(nodeSnapshot *Snapshot) error {
", trying to recover",
len(nodeSnapshot.nodeInfoList), cache.nodeTree.numNodes,
len(nodeSnapshot.nodeInfoMap), len(cache.nodes))
klog.Error(errMsg)
klog.ErrorS(nil, errMsg)
// We will try to recover by re-creating the lists for the next scheduling cycle, but still return an
// error to surface the problem, the error will likely cause a failure to the current scheduling cycle.
cache.updateNodeInfoSnapshotList(nodeSnapshot, true)
@ -282,7 +283,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda
snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes)
nodesList, err := cache.nodeTree.list()
if err != nil {
klog.Error(err)
klog.ErrorS(err, "Error occurred while retrieving the list of names of the nodes from node tree")
}
for _, nodeName := range nodesList {
if nodeInfo := snapshot.nodeInfoMap[nodeName]; nodeInfo != nil {
@ -294,7 +295,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda
snapshot.havePodsWithRequiredAntiAffinityNodeInfoList = append(snapshot.havePodsWithRequiredAntiAffinityNodeInfoList, nodeInfo)
}
} else {
klog.Errorf("node %q exist in nodeTree but not in NodeInfoMap, this should not happen.", nodeName)
klog.ErrorS(nil, "Node exists in nodeTree but not in NodeInfoMap, this should not happen", "node", klog.KRef("", nodeName))
}
}
} else {
@ -381,7 +382,7 @@ func (cache *schedulerCache) finishBinding(pod *v1.Pod, now time.Time) error {
cache.mu.RLock()
defer cache.mu.RUnlock()
klog.V(5).Infof("Finished binding for pod %v. Can be expired.", key)
klog.V(5).InfoS("Finished binding for pod, can be expired", "pod", klog.KObj(pod))
currState, ok := cache.podStates[key]
if ok && cache.assumedPods.Has(key) {
dl := now.Add(cache.ttl)
@ -447,7 +448,7 @@ func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
func (cache *schedulerCache) removePod(pod *v1.Pod) error {
n, ok := cache.nodes[pod.Spec.NodeName]
if !ok {
klog.Errorf("node %v not found when trying to remove pod %v", pod.Spec.NodeName, pod.Name)
klog.ErrorS(nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "pod", klog.KObj(pod))
return nil
}
if err := n.info.RemovePod(pod); err != nil {
@ -475,10 +476,10 @@ func (cache *schedulerCache) AddPod(pod *v1.Pod) error {
case ok && cache.assumedPods.Has(key):
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
// The pod was added to a different node than it was assumed to.
klog.Warningf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
klog.InfoS("Pod was added to a different node than it was assumed", "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName))
// Clean this up.
if err = cache.removePod(currState.pod); err != nil {
klog.Errorf("removing pod error: %v", err)
klog.ErrorS(err, "Error occurred while removing pod")
}
cache.addPod(pod)
}
@ -513,8 +514,9 @@ func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error {
// before Update event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods.Has(key):
if currState.pod.Spec.NodeName != newPod.Spec.NodeName {
klog.Errorf("Pod %v updated on a different node than previously added to.", key)
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
klog.ErrorS(nil, "Pod updated on a different node than previously added to", "pod", klog.KObj(oldPod))
klog.ErrorS(nil, "SchedulerCache is corrupted and can badly affect scheduling decisions")
os.Exit(1)
}
if err := cache.updatePod(oldPod, newPod); err != nil {
return err
@ -539,11 +541,12 @@ func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {
switch {
case ok:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
klog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
klog.ErrorS(nil, "Pod was added to a different node than it was assumed", "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName))
if pod.Spec.NodeName != "" {
// An empty NodeName is possible when the scheduler misses a Delete
// event and it gets the last known state from the informer cache.
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
klog.ErrorS(nil, "SchedulerCache is corrupted and can badly affect scheduling decisions")
os.Exit(1)
}
}
return cache.expirePod(key, currState)
@ -723,17 +726,18 @@ func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
for key := range cache.assumedPods {
ps, ok := cache.podStates[key]
if !ok {
klog.Fatal("Key found in assumed set but not in podStates. Potentially a logical error.")
klog.ErrorS(nil, "Key found in assumed set but not in podStates, potentially a logical error")
os.Exit(1)
}
if !ps.bindingFinished {
klog.V(5).Infof("Couldn't expire cache for pod %v/%v. Binding is still in progress.",
ps.pod.Namespace, ps.pod.Name)
klog.V(5).InfoS("Could not expire cache for pod as binding is still in progress",
"pod", klog.KObj(ps.pod))
continue
}
if now.After(*ps.deadline) {
klog.Warningf("Pod %s/%s expired", ps.pod.Namespace, ps.pod.Name)
klog.InfoS("Pod expired", "pod", klog.KObj(ps.pod))
if err := cache.expirePod(key, ps); err != nil {
klog.Errorf("ExpirePod failed for %s: %v", key, err)
klog.ErrorS(err, "ExpirePod failed", "pod", klog.KObj(ps.pod))
}
}
}