mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #105969 from shivanshu1333/feature4/master/105841
Migrated scheduler file `cache.go` to structured logging
This commit is contained in:
commit
927914dec7
40
pkg/scheduler/internal/cache/cache.go
vendored
40
pkg/scheduler/internal/cache/cache.go
vendored
@ -18,6 +18,7 @@ package cache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -123,7 +124,7 @@ func newNodeInfoListItem(ni *framework.NodeInfo) *nodeInfoListItem {
|
||||
func (cache *schedulerCache) moveNodeInfoToHead(name string) {
|
||||
ni, ok := cache.nodes[name]
|
||||
if !ok {
|
||||
klog.Errorf("No NodeInfo with name %v found in the cache", name)
|
||||
klog.ErrorS(nil, "No node info with given name found in the cache", "node", klog.KRef("", name))
|
||||
return
|
||||
}
|
||||
// if the node info list item is already at the head, we are done.
|
||||
@ -151,7 +152,7 @@ func (cache *schedulerCache) moveNodeInfoToHead(name string) {
|
||||
func (cache *schedulerCache) removeNodeInfoFromList(name string) {
|
||||
ni, ok := cache.nodes[name]
|
||||
if !ok {
|
||||
klog.Errorf("No NodeInfo with name %v found in the cache", name)
|
||||
klog.ErrorS(nil, "No node info with given name found in the cache", "node", klog.KRef("", name))
|
||||
return
|
||||
}
|
||||
|
||||
@ -264,7 +265,7 @@ func (cache *schedulerCache) UpdateSnapshot(nodeSnapshot *Snapshot) error {
|
||||
", trying to recover",
|
||||
len(nodeSnapshot.nodeInfoList), cache.nodeTree.numNodes,
|
||||
len(nodeSnapshot.nodeInfoMap), len(cache.nodes))
|
||||
klog.Error(errMsg)
|
||||
klog.ErrorS(nil, errMsg)
|
||||
// We will try to recover by re-creating the lists for the next scheduling cycle, but still return an
|
||||
// error to surface the problem, the error will likely cause a failure to the current scheduling cycle.
|
||||
cache.updateNodeInfoSnapshotList(nodeSnapshot, true)
|
||||
@ -282,7 +283,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda
|
||||
snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes)
|
||||
nodesList, err := cache.nodeTree.list()
|
||||
if err != nil {
|
||||
klog.Error(err)
|
||||
klog.ErrorS(err, "Error occurred while retrieving the list of names of the nodes from node tree")
|
||||
}
|
||||
for _, nodeName := range nodesList {
|
||||
if nodeInfo := snapshot.nodeInfoMap[nodeName]; nodeInfo != nil {
|
||||
@ -294,7 +295,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda
|
||||
snapshot.havePodsWithRequiredAntiAffinityNodeInfoList = append(snapshot.havePodsWithRequiredAntiAffinityNodeInfoList, nodeInfo)
|
||||
}
|
||||
} else {
|
||||
klog.Errorf("node %q exist in nodeTree but not in NodeInfoMap, this should not happen.", nodeName)
|
||||
klog.ErrorS(nil, "Node exists in nodeTree but not in NodeInfoMap, this should not happen", "node", klog.KRef("", nodeName))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -381,7 +382,7 @@ func (cache *schedulerCache) finishBinding(pod *v1.Pod, now time.Time) error {
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
|
||||
klog.V(5).Infof("Finished binding for pod %v. Can be expired.", key)
|
||||
klog.V(5).InfoS("Finished binding for pod, can be expired", "pod", klog.KObj(pod))
|
||||
currState, ok := cache.podStates[key]
|
||||
if ok && cache.assumedPods.Has(key) {
|
||||
dl := now.Add(cache.ttl)
|
||||
@ -447,7 +448,7 @@ func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
|
||||
func (cache *schedulerCache) removePod(pod *v1.Pod) error {
|
||||
n, ok := cache.nodes[pod.Spec.NodeName]
|
||||
if !ok {
|
||||
klog.Errorf("node %v not found when trying to remove pod %v", pod.Spec.NodeName, pod.Name)
|
||||
klog.ErrorS(nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "pod", klog.KObj(pod))
|
||||
return nil
|
||||
}
|
||||
if err := n.info.RemovePod(pod); err != nil {
|
||||
@ -475,10 +476,10 @@ func (cache *schedulerCache) AddPod(pod *v1.Pod) error {
|
||||
case ok && cache.assumedPods.Has(key):
|
||||
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
|
||||
// The pod was added to a different node than it was assumed to.
|
||||
klog.Warningf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
|
||||
klog.InfoS("Pod was added to a different node than it was assumed", "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName))
|
||||
// Clean this up.
|
||||
if err = cache.removePod(currState.pod); err != nil {
|
||||
klog.Errorf("removing pod error: %v", err)
|
||||
klog.ErrorS(err, "Error occurred while removing pod")
|
||||
}
|
||||
cache.addPod(pod)
|
||||
}
|
||||
@ -513,8 +514,9 @@ func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error {
|
||||
// before Update event, in which case the state would change from Assumed to Added.
|
||||
case ok && !cache.assumedPods.Has(key):
|
||||
if currState.pod.Spec.NodeName != newPod.Spec.NodeName {
|
||||
klog.Errorf("Pod %v updated on a different node than previously added to.", key)
|
||||
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
|
||||
klog.ErrorS(nil, "Pod updated on a different node than previously added to", "pod", klog.KObj(oldPod))
|
||||
klog.ErrorS(nil, "SchedulerCache is corrupted and can badly affect scheduling decisions")
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := cache.updatePod(oldPod, newPod); err != nil {
|
||||
return err
|
||||
@ -539,11 +541,12 @@ func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {
|
||||
switch {
|
||||
case ok:
|
||||
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
|
||||
klog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
|
||||
klog.ErrorS(nil, "Pod was added to a different node than it was assumed", "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName))
|
||||
if pod.Spec.NodeName != "" {
|
||||
// An empty NodeName is possible when the scheduler misses a Delete
|
||||
// event and it gets the last known state from the informer cache.
|
||||
klog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
|
||||
klog.ErrorS(nil, "SchedulerCache is corrupted and can badly affect scheduling decisions")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
return cache.expirePod(key, currState)
|
||||
@ -723,17 +726,18 @@ func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
|
||||
for key := range cache.assumedPods {
|
||||
ps, ok := cache.podStates[key]
|
||||
if !ok {
|
||||
klog.Fatal("Key found in assumed set but not in podStates. Potentially a logical error.")
|
||||
klog.ErrorS(nil, "Key found in assumed set but not in podStates, potentially a logical error")
|
||||
os.Exit(1)
|
||||
}
|
||||
if !ps.bindingFinished {
|
||||
klog.V(5).Infof("Couldn't expire cache for pod %v/%v. Binding is still in progress.",
|
||||
ps.pod.Namespace, ps.pod.Name)
|
||||
klog.V(5).InfoS("Could not expire cache for pod as binding is still in progress",
|
||||
"pod", klog.KObj(ps.pod))
|
||||
continue
|
||||
}
|
||||
if now.After(*ps.deadline) {
|
||||
klog.Warningf("Pod %s/%s expired", ps.pod.Namespace, ps.pod.Name)
|
||||
klog.InfoS("Pod expired", "pod", klog.KObj(ps.pod))
|
||||
if err := cache.expirePod(key, ps); err != nil {
|
||||
klog.Errorf("ExpirePod failed for %s: %v", key, err)
|
||||
klog.ErrorS(err, "ExpirePod failed", "pod", klog.KObj(ps.pod))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user