From 7074d2fd392b8e7568141ad2057e942fc5c5201c Mon Sep 17 00:00:00 2001 From: Janet Kuo Date: Mon, 30 Jan 2017 19:22:54 -0800 Subject: [PATCH 1/2] Emit events on 'Failed' daemon pods --- pkg/controller/daemon/daemoncontroller.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index 9fdbeb89b05..210ba4f94a3 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -480,7 +480,10 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error { for i := range daemonPods { pod := daemonPods[i] if pod.Status.Phase == v1.PodFailed { - glog.V(2).Infof("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name) + msg := fmt.Sprintf("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name) + glog.V(2).Infof(msg) + // Emit an event so that it's discoverable to users. + dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, "FailedDaemonPod", msg) podsToDelete = append(podsToDelete, pod.Name) failedPodsObserved++ } else { From f531bf205a01fe3c005dc9945b6caeb8a9db708f Mon Sep 17 00:00:00 2001 From: Janet Kuo Date: Wed, 1 Feb 2017 11:32:03 -0800 Subject: [PATCH 2/2] Address comments --- pkg/controller/daemon/daemoncontroller.go | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index 210ba4f94a3..69c9d0705c2 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -56,6 +56,14 @@ const ( // If sending a status upate to API server fails, we retry a finite number of times. StatusUpdateRetries = 1 + + // Reasons for DaemonSet events + // SelectingAllReason is added to an event when a DaemonSet selects all Pods. + SelectingAllReason = "SelectingAll" + // FailedPlacementReason is added to an event when a DaemonSet can't schedule a Pod to a specified node. + FailedPlacementReason = "FailedPlacement" + // FailedDaemonPodReason is added to an event when the status of a Pod of a DaemonSet is 'Failed'. + FailedDaemonPodReason = "FailedDaemonPod" ) // DaemonSetsController is responsible for synchronizing DaemonSet objects stored @@ -479,11 +487,15 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error { var daemonPodsRunning []*v1.Pod for i := range daemonPods { pod := daemonPods[i] + // Skip terminating pods. We won't delete them again or count them as running daemon pods. + if pod.DeletionTimestamp != nil { + continue + } if pod.Status.Phase == v1.PodFailed { msg := fmt.Sprintf("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name) glog.V(2).Infof(msg) // Emit an event so that it's discoverable to users. - dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, "FailedDaemonPod", msg) + dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedDaemonPodReason, msg) podsToDelete = append(podsToDelete, pod.Name) failedPodsObserved++ } else { @@ -676,7 +688,7 @@ func (dsc *DaemonSetsController) syncDaemonSet(key string) error { everything := metav1.LabelSelector{} if reflect.DeepEqual(ds.Spec.Selector, &everything) { - dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, "SelectingAll", "This daemon set is selecting all pods. A non-empty selector is required.") + dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, SelectingAllReason, "This daemon set is selecting all pods. A non-empty selector is required.") return nil } @@ -762,7 +774,7 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten glog.V(4).Infof("GeneralPredicates failed on ds '%s/%s' for reason: %v", ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason()) switch reason := r.(type) { case *predicates.InsufficientResourceError: - dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.Error()) + dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.Error()) shouldSchedule = false case *predicates.PredicateFailureError: var emitEvent bool @@ -801,7 +813,7 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten emitEvent = true } if emitEvent { - dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.GetReason()) + dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.GetReason()) } } }