From e46d445045117ec954efc54095b1de299325fcfe Mon Sep 17 00:00:00 2001 From: Janet Kuo Date: Tue, 24 Jan 2017 14:49:35 -0800 Subject: [PATCH] Add unit test for deleting failed daemon pods --- pkg/controller/daemon/daemoncontroller.go | 13 ++++---- .../daemon/daemoncontroller_test.go | 33 +++++++++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/pkg/controller/daemon/daemoncontroller.go b/pkg/controller/daemon/daemoncontroller.go index f4bb834886d..1d011436a2e 100644 --- a/pkg/controller/daemon/daemoncontroller.go +++ b/pkg/controller/daemon/daemoncontroller.go @@ -475,15 +475,16 @@ func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) error { nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, node.Name) case shouldContinueRunning: // If a daemon pod failed, delete it + // If there's no daemon pods left on this node, we will create it in the next sync loop // TODO: handle the case when the daemon pods fail consistently and causes kill-recreate hot loop var daemonPodsRunning []*v1.Pod for i := range daemonPods { - daemon := daemonPods[i] - if daemon.Status.Phase == v1.PodFailed { - glog.V(2).Infof("Found failed daemon pod %s/%s, will try to kill it", daemon.Namespace, daemon.Name) - podsToDelete = append(podsToDelete, daemon.Name) + pod := daemonPods[i] + if pod.Status.Phase == v1.PodFailed { + glog.V(2).Infof("Found failed daemon pod %s/%s on node %s, will try to kill it", pod.Namespace, node.Name, pod.Name) + podsToDelete = append(podsToDelete, pod.Name) } else { - daemonPodsRunning = append(daemonPodsRunning, daemon) + daemonPodsRunning = append(daemonPodsRunning, pod) } } // If daemon pod is supposed to be running on node, but more than 1 daemon pod is running, delete the excess daemon pods. @@ -788,7 +789,7 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten predicates.ErrTaintsTolerationsNotMatch: return false, false, false, fmt.Errorf("unexpected reason: GeneralPredicates should not return reason %s", reason.GetReason()) default: - glog.V(4).Infof("unknownd predicate failure reason: %s", reason.GetReason()) + glog.V(4).Infof("unknown predicate failure reason: %s", reason.GetReason()) wantToRun, shouldSchedule, shouldContinueRunning = false, false, false emitEvent = true } diff --git a/pkg/controller/daemon/daemoncontroller_test.go b/pkg/controller/daemon/daemoncontroller_test.go index e30c59d5ea8..3cff2c047a9 100644 --- a/pkg/controller/daemon/daemoncontroller_test.go +++ b/pkg/controller/daemon/daemoncontroller_test.go @@ -138,6 +138,14 @@ func addPods(podStore cache.Store, nodeName string, label map[string]string, num } } +func addFailedPods(podStore cache.Store, nodeName string, label map[string]string, number int) { + for i := 0; i < number; i++ { + pod := newPod(fmt.Sprintf("%s-", nodeName), nodeName, label) + pod.Status = v1.PodStatus{Phase: v1.PodFailed} + podStore.Add(pod) + } +} + func newTestController(initialObjects ...runtime.Object) (*DaemonSetsController, *controller.FakePodControl, *fake.Clientset) { clientset := fake.NewSimpleClientset(initialObjects...) informerFactory := informers.NewSharedInformerFactory(clientset, nil, controller.NoResyncPeriodFunc()) @@ -653,6 +661,31 @@ func TestObservedGeneration(t *testing.T) { } } +// DaemonSet controller should kill all failed pods and recreate at most 1 failed pod. +func TestDaemonKillFailedPods(t *testing.T) { + tests := []struct { + numFailedPods, numNormalPods, expectedCreates, expectedDeletes int + test string + }{ + {numFailedPods: 0, numNormalPods: 1, expectedCreates: 0, expectedDeletes: 0, test: "normal (do nothing)"}, + {numFailedPods: 0, numNormalPods: 0, expectedCreates: 1, expectedDeletes: 0, test: "no pods (create 1)"}, + {numFailedPods: 1, numNormalPods: 0, expectedCreates: 0, expectedDeletes: 1, test: "1 failed pod (kill 1), 0 normal pod (create 0; will create in the next sync)"}, + {numFailedPods: 1, numNormalPods: 3, expectedCreates: 0, expectedDeletes: 3, test: "1 failed pod (kill 1), 3 normal pods (kill 2)"}, + {numFailedPods: 2, numNormalPods: 1, expectedCreates: 0, expectedDeletes: 2, test: "2 failed pods (kill 2), 1 normal pod"}, + } + + for _, test := range tests { + t.Logf("test case: %s\n", test.test) + manager, podControl, _ := newTestController() + addNodes(manager.nodeStore.Store, 0, 1, nil) + addFailedPods(manager.podStore.Indexer, "node-0", simpleDaemonSetLabel, test.numFailedPods) + addPods(manager.podStore.Indexer, "node-0", simpleDaemonSetLabel, test.numNormalPods) + ds := newDaemonSet("foo") + manager.dsStore.Add(ds) + syncAndValidateDaemonSets(t, manager, ds, podControl, test.expectedCreates, test.expectedDeletes) + } +} + func TestNodeShouldRunDaemonPod(t *testing.T) { cases := []struct { podsOnNode []*v1.Pod