From bb31040f065d77ea31c2fb6a685c5840e32038a3 Mon Sep 17 00:00:00 2001 From: Shivanshu Raj Shrivastava Date: Sun, 7 Nov 2021 19:19:14 +0530 Subject: [PATCH] migrate cache.go to structured logging --- pkg/scheduler/internal/cache/cache.go | 40 +++++++++++++++------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/pkg/scheduler/internal/cache/cache.go b/pkg/scheduler/internal/cache/cache.go index 6c125d08d74..9519cc85e45 100644 --- a/pkg/scheduler/internal/cache/cache.go +++ b/pkg/scheduler/internal/cache/cache.go @@ -18,6 +18,7 @@ package cache import ( "fmt" + "os" "sync" "time" @@ -123,7 +124,7 @@ func newNodeInfoListItem(ni *framework.NodeInfo) *nodeInfoListItem { func (cache *schedulerCache) moveNodeInfoToHead(name string) { ni, ok := cache.nodes[name] if !ok { - klog.Errorf("No NodeInfo with name %v found in the cache", name) + klog.ErrorS(nil, "No node info with given name found in the cache", "node", klog.KRef("", name)) return } // if the node info list item is already at the head, we are done. @@ -151,7 +152,7 @@ func (cache *schedulerCache) moveNodeInfoToHead(name string) { func (cache *schedulerCache) removeNodeInfoFromList(name string) { ni, ok := cache.nodes[name] if !ok { - klog.Errorf("No NodeInfo with name %v found in the cache", name) + klog.ErrorS(nil, "No node info with given name found in the cache", "node", klog.KRef("", name)) return } @@ -264,7 +265,7 @@ func (cache *schedulerCache) UpdateSnapshot(nodeSnapshot *Snapshot) error { ", trying to recover", len(nodeSnapshot.nodeInfoList), cache.nodeTree.numNodes, len(nodeSnapshot.nodeInfoMap), len(cache.nodes)) - klog.Error(errMsg) + klog.ErrorS(nil, errMsg) // We will try to recover by re-creating the lists for the next scheduling cycle, but still return an // error to surface the problem, the error will likely cause a failure to the current scheduling cycle. cache.updateNodeInfoSnapshotList(nodeSnapshot, true) @@ -282,7 +283,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes) nodesList, err := cache.nodeTree.list() if err != nil { - klog.Error(err) + klog.ErrorS(err, "Error occurred while retrieving the list of names of the nodes from node tree") } for _, nodeName := range nodesList { if nodeInfo := snapshot.nodeInfoMap[nodeName]; nodeInfo != nil { @@ -294,7 +295,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda snapshot.havePodsWithRequiredAntiAffinityNodeInfoList = append(snapshot.havePodsWithRequiredAntiAffinityNodeInfoList, nodeInfo) } } else { - klog.Errorf("node %q exist in nodeTree but not in NodeInfoMap, this should not happen.", nodeName) + klog.ErrorS(nil, "Node exists in nodeTree but not in NodeInfoMap, this should not happen", "node", klog.KRef("", nodeName)) } } } else { @@ -381,7 +382,7 @@ func (cache *schedulerCache) finishBinding(pod *v1.Pod, now time.Time) error { cache.mu.RLock() defer cache.mu.RUnlock() - klog.V(5).Infof("Finished binding for pod %v. Can be expired.", key) + klog.V(5).InfoS("Finished binding for pod, can be expired", "pod", klog.KObj(pod)) currState, ok := cache.podStates[key] if ok && cache.assumedPods.Has(key) { dl := now.Add(cache.ttl) @@ -447,7 +448,7 @@ func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error { func (cache *schedulerCache) removePod(pod *v1.Pod) error { n, ok := cache.nodes[pod.Spec.NodeName] if !ok { - klog.Errorf("node %v not found when trying to remove pod %v", pod.Spec.NodeName, pod.Name) + klog.ErrorS(nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "pod", klog.KObj(pod)) return nil } if err := n.info.RemovePod(pod); err != nil { @@ -475,10 +476,10 @@ func (cache *schedulerCache) AddPod(pod *v1.Pod) error { case ok && cache.assumedPods.Has(key): if currState.pod.Spec.NodeName != pod.Spec.NodeName { // The pod was added to a different node than it was assumed to. - klog.Warningf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName) + klog.InfoS("Pod was added to a different node than it was assumed", "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName)) // Clean this up. if err = cache.removePod(currState.pod); err != nil { - klog.Errorf("removing pod error: %v", err) + klog.ErrorS(err, "Error occurred while removing pod") } cache.addPod(pod) } @@ -513,8 +514,9 @@ func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error { // before Update event, in which case the state would change from Assumed to Added. case ok && !cache.assumedPods.Has(key): if currState.pod.Spec.NodeName != newPod.Spec.NodeName { - klog.Errorf("Pod %v updated on a different node than previously added to.", key) - klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions") + klog.ErrorS(nil, "Pod updated on a different node than previously added to", "pod", klog.KObj(oldPod)) + klog.ErrorS(nil, "SchedulerCache is corrupted and can badly affect scheduling decisions") + os.Exit(1) } if err := cache.updatePod(oldPod, newPod); err != nil { return err @@ -539,11 +541,12 @@ func (cache *schedulerCache) RemovePod(pod *v1.Pod) error { switch { case ok: if currState.pod.Spec.NodeName != pod.Spec.NodeName { - klog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName) + klog.ErrorS(nil, "Pod was added to a different node than it was assumed", "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName)) if pod.Spec.NodeName != "" { // An empty NodeName is possible when the scheduler misses a Delete // event and it gets the last known state from the informer cache. - klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions") + klog.ErrorS(nil, "SchedulerCache is corrupted and can badly affect scheduling decisions") + os.Exit(1) } } return cache.expirePod(key, currState) @@ -723,17 +726,18 @@ func (cache *schedulerCache) cleanupAssumedPods(now time.Time) { for key := range cache.assumedPods { ps, ok := cache.podStates[key] if !ok { - klog.Fatal("Key found in assumed set but not in podStates. Potentially a logical error.") + klog.ErrorS(nil, "Key found in assumed set but not in podStates, potentially a logical error") + os.Exit(1) } if !ps.bindingFinished { - klog.V(5).Infof("Couldn't expire cache for pod %v/%v. Binding is still in progress.", - ps.pod.Namespace, ps.pod.Name) + klog.V(5).InfoS("Could not expire cache for pod as binding is still in progress", + "pod", klog.KObj(ps.pod)) continue } if now.After(*ps.deadline) { - klog.Warningf("Pod %s/%s expired", ps.pod.Namespace, ps.pod.Name) + klog.InfoS("Pod expired", "pod", klog.KObj(ps.pod)) if err := cache.expirePod(key, ps); err != nil { - klog.Errorf("ExpirePod failed for %s: %v", key, err) + klog.ErrorS(err, "ExpirePod failed", "pod", klog.KObj(ps.pod)) } } }