From df0ade56c4894208133963d816cb66630b6c7413 Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Tue, 30 Jul 2019 13:20:16 -0400 Subject: [PATCH] Lower verbosity level for some useful scheduler logs; Also add node resource info when pod is scheduled on node. --- pkg/scheduler/factory/factory.go | 9 ++++++--- pkg/scheduler/internal/cache/cache.go | 4 +++- pkg/scheduler/internal/cache/node_tree.go | 10 +++++----- pkg/scheduler/scheduler.go | 19 ++++++++++++++++++- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/pkg/scheduler/factory/factory.go b/pkg/scheduler/factory/factory.go index b745f0f881f..8f3677000e5 100644 --- a/pkg/scheduler/factory/factory.go +++ b/pkg/scheduler/factory/factory.go @@ -653,11 +653,12 @@ func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) core func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.SchedulingQueue, schedulerCache internalcache.Cache, stopEverything <-chan struct{}) func(pod *v1.Pod, err error) { return func(pod *v1.Pod, err error) { if err == core.ErrNoNodesAvailable { - klog.V(4).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name) + klog.V(2).Infof("Unable to schedule %v/%v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name) } else { if _, ok := err.(*core.FitError); ok { - klog.V(4).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err) + klog.V(2).Infof("Unable to schedule %v/%v: no fit: %v; waiting", pod.Namespace, pod.Name, err) } else if errors.IsNotFound(err) { + klog.V(2).Infof("Unable to schedule %v/%v: possibly due to node not found: %v; waiting", pod.Namespace, pod.Name, err) if errStatus, ok := err.(errors.APIStatus); ok && errStatus.Status().Details.Kind == "node" { nodeName := errStatus.Status().Details.Name // when node is not found, We do not remove the node right away. Trying again to get @@ -665,7 +666,9 @@ func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.Sch _, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil && errors.IsNotFound(err) { node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} - schedulerCache.RemoveNode(&node) + if err := schedulerCache.RemoveNode(&node); err != nil { + klog.V(4).Infof("Node %q is not found; failed to remove it from the cache.", node.Name) + } } } } else { diff --git a/pkg/scheduler/internal/cache/cache.go b/pkg/scheduler/internal/cache/cache.go index 952a6124c7c..11642395fde 100644 --- a/pkg/scheduler/internal/cache/cache.go +++ b/pkg/scheduler/internal/cache/cache.go @@ -565,7 +565,9 @@ func (cache *schedulerCache) RemoveNode(node *v1.Node) error { cache.moveNodeInfoToHead(node.Name) } - cache.nodeTree.RemoveNode(node) + if err := cache.nodeTree.RemoveNode(node); err != nil { + return err + } cache.removeNodeImageStates(node) return nil } diff --git a/pkg/scheduler/internal/cache/node_tree.go b/pkg/scheduler/internal/cache/node_tree.go index 570d6d2f364..de53feef487 100644 --- a/pkg/scheduler/internal/cache/node_tree.go +++ b/pkg/scheduler/internal/cache/node_tree.go @@ -81,7 +81,7 @@ func (nt *NodeTree) addNode(n *v1.Node) { if na, ok := nt.tree[zone]; ok { for _, nodeName := range na.nodes { if nodeName == n.Name { - klog.Warningf("node %v already exist in the NodeTree", n.Name) + klog.Warningf("node %q already exist in the NodeTree", n.Name) return } } @@ -90,7 +90,7 @@ func (nt *NodeTree) addNode(n *v1.Node) { nt.zones = append(nt.zones, zone) nt.tree[zone] = &nodeArray{nodes: []string{n.Name}, lastIndex: 0} } - klog.V(5).Infof("Added node %v in group %v to NodeTree", n.Name, zone) + klog.V(2).Infof("Added node %q in group %q to NodeTree", n.Name, zone) nt.numNodes++ } @@ -110,14 +110,14 @@ func (nt *NodeTree) removeNode(n *v1.Node) error { if len(na.nodes) == 0 { nt.removeZone(zone) } - klog.V(5).Infof("Removed node %v in group %v from NodeTree", n.Name, zone) + klog.V(2).Infof("Removed node %q in group %q from NodeTree", n.Name, zone) nt.numNodes-- return nil } } } - klog.Errorf("Node %v in group %v was not found", n.Name, zone) - return fmt.Errorf("node %v in group %v was not found", n.Name, zone) + klog.Errorf("Node %q in group %q was not found", n.Name, zone) + return fmt.Errorf("node %q in group %q was not found", n.Name, zone) } // removeZone removes a zone from tree. diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 9a9be063aa3..ed13c780f89 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -605,7 +605,12 @@ func (sched *Scheduler) scheduleOne() { fwk.RunUnreservePlugins(pluginContext, assumedPod, scheduleResult.SuggestedHost) sched.recordSchedulingFailure(assumedPod, err, SchedulerError, fmt.Sprintf("Binding rejected: %v", err)) } else { - klog.V(2).Infof("pod %v/%v is bound successfully on node %v, %d nodes evaluated, %d nodes were found feasible", assumedPod.Namespace, assumedPod.Name, scheduleResult.SuggestedHost, scheduleResult.EvaluatedNodes, scheduleResult.FeasibleNodes) + // Calculating nodeResourceString can be heavy. Avoid it if klog verbosity is below 2. + if klog.V(2) { + node, _ := sched.Cache().GetNodeInfo(scheduleResult.SuggestedHost) + klog.Infof("pod %v/%v is bound successfully on node %q, %d nodes evaluated, %d nodes were found feasible. Bound node resource: %q.", assumedPod.Namespace, assumedPod.Name, scheduleResult.SuggestedHost, scheduleResult.EvaluatedNodes, scheduleResult.FeasibleNodes, nodeResourceString(node)) + } + metrics.PodScheduleSuccesses.Inc() // Run "postbind" plugins. @@ -613,3 +618,15 @@ func (sched *Scheduler) scheduleOne() { } }() } + +// nodeResourceString returns a string representation of node resources. +func nodeResourceString(n *v1.Node) string { + if n == nil { + return "N/A" + } + return fmt.Sprintf("Capacity: %s; Allocatable: %s.", resourceString(&n.Status.Capacity), resourceString(&n.Status.Allocatable)) +} + +func resourceString(r *v1.ResourceList) string { + return fmt.Sprintf("CPU<%s>|Memory<%s>|Pods<%s>|StorageEphemeral<%s>", r.Cpu().String(), r.Memory().String(), r.Pods().String(), r.StorageEphemeral().String()) +}