Merge pull request #98259 from tanjing2020/taint_manager_log

migrate scheduler/taint_manager.go structured logging
This commit is contained in:
Kubernetes Prow Robot 2021-02-04 23:50:51 -08:00 committed by GitHub
commit 6dc0047396
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -25,7 +25,7 @@ import (
"sync" "sync"
"time" "time"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
@ -103,7 +103,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
return func(args *WorkArgs) error { return func(args *WorkArgs) error {
ns := args.NamespacedName.Namespace ns := args.NamespacedName.Namespace
name := args.NamespacedName.Name name := args.NamespacedName.Name
klog.V(0).Infof("NoExecuteTaintManager is deleting Pod: %v", args.NamespacedName.String()) klog.V(0).InfoS("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
if emitEventFunc != nil { if emitEventFunc != nil {
emitEventFunc(args.NamespacedName) emitEventFunc(args.NamespacedName)
} }
@ -160,7 +160,7 @@ func NewNoExecuteTaintManager(c clientset.Interface, getPod GetPodFunc, getNode
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"}) recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"})
eventBroadcaster.StartStructuredLogging(0) eventBroadcaster.StartStructuredLogging(0)
if c != nil { if c != nil {
klog.V(0).Infof("Sending events to api server.") klog.V(0).InfoS("Sending events to api server")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.CoreV1().Events("")}) eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.CoreV1().Events("")})
} else { } else {
klog.Fatalf("kubeClient is nil when starting NodeController") klog.Fatalf("kubeClient is nil when starting NodeController")
@ -184,7 +184,7 @@ func NewNoExecuteTaintManager(c clientset.Interface, getPod GetPodFunc, getNode
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed. // Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) { func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
klog.V(0).Infof("Starting NoExecuteTaintManager") klog.V(0).InfoS("Starting NoExecuteTaintManager")
for i := 0; i < UpdateWorkerSize; i++ { for i := 0; i < UpdateWorkerSize; i++ {
tc.nodeUpdateChannels = append(tc.nodeUpdateChannels, make(chan nodeUpdateItem, NodeUpdateChannelSize)) tc.nodeUpdateChannels = append(tc.nodeUpdateChannels, make(chan nodeUpdateItem, NodeUpdateChannelSize))
@ -349,7 +349,7 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
} }
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations) allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
if !allTolerated { if !allTolerated {
klog.V(2).Infof("Not all taints are tolerated after update for Pod %v on %v", podNamespacedName.String(), nodeName) klog.V(2).InfoS("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", nodeName)
// We're canceling scheduled work (if any), as we're going to delete the Pod right away. // We're canceling scheduled work (if any), as we're going to delete the Pod right away.
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(podNamespacedName)
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now()) tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
@ -358,7 +358,7 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
minTolerationTime := getMinTolerationTime(usedTolerations) minTolerationTime := getMinTolerationTime(usedTolerations)
// getMinTolerationTime returns negative value to denote infinite toleration. // getMinTolerationTime returns negative value to denote infinite toleration.
if minTolerationTime < 0 { if minTolerationTime < 0 {
klog.V(4).Infof("Current tolerations for %v tolerate forever, cancelling any scheduled deletion.", podNamespacedName.String()) klog.V(4).InfoS("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(podNamespacedName)
return return
} }
@ -382,7 +382,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate podUpdateItem) {
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
// Delete // Delete
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName} podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
klog.V(4).Infof("Noticed pod deletion: %#v", podNamespacedName) klog.V(4).InfoS("Noticed pod deletion", "pod", podNamespacedName)
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(podNamespacedName)
return return
} }
@ -397,7 +397,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate podUpdateItem) {
// Create or Update // Create or Update
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
klog.V(4).Infof("Noticed pod update: %#v", podNamespacedName) klog.V(4).InfoS("Noticed pod update", "pod", podNamespacedName)
nodeName := pod.Spec.NodeName nodeName := pod.Spec.NodeName
if nodeName == "" { if nodeName == "" {
return return
@ -421,7 +421,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
if err != nil { if err != nil {
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
// Delete // Delete
klog.V(4).Infof("Noticed node deletion: %#v", nodeUpdate.nodeName) klog.V(4).InfoS("Noticed node deletion", "node", nodeUpdate.nodeName)
tc.taintedNodesLock.Lock() tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock() defer tc.taintedNodesLock.Unlock()
delete(tc.taintedNodes, nodeUpdate.nodeName) delete(tc.taintedNodes, nodeUpdate.nodeName)
@ -432,12 +432,12 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
} }
// Create or Update // Create or Update
klog.V(4).Infof("Noticed node update: %#v", nodeUpdate) klog.V(4).InfoS("Noticed node update", "node", nodeUpdate)
taints := getNoExecuteTaints(node.Spec.Taints) taints := getNoExecuteTaints(node.Spec.Taints)
func() { func() {
tc.taintedNodesLock.Lock() tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock() defer tc.taintedNodesLock.Unlock()
klog.V(4).Infof("Updating known taints on node %v: %v", node.Name, taints) klog.V(4).InfoS("Updating known taints on node", "node", node.Name, "taints", taints)
if len(taints) == 0 { if len(taints) == 0 {
delete(tc.taintedNodes, node.Name) delete(tc.taintedNodes, node.Name)
} else { } else {
@ -450,7 +450,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
// tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods. // tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods.
pods, err := tc.getPodsAssignedToNode(node.Name) pods, err := tc.getPodsAssignedToNode(node.Name)
if err != nil { if err != nil {
klog.Errorf(err.Error()) klog.ErrorS(err, "Failed to get pods assigned to node", "node", node.Name)
return return
} }
if len(pods) == 0 { if len(pods) == 0 {
@ -458,7 +458,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
} }
// Short circuit, to make this controller a bit faster. // Short circuit, to make this controller a bit faster.
if len(taints) == 0 { if len(taints) == 0 {
klog.V(4).Infof("All taints were removed from the Node %v. Cancelling all evictions...", node.Name) klog.V(4).InfoS("All taints were removed from the node. Cancelling all evictions...", "node", node.Name)
for i := range pods { for i := range pods {
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name}) tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
} }