From 0ba1b49b42e517dea09a3908edc26da64a462c57 Mon Sep 17 00:00:00 2001 From: Matt Liggett Date: Fri, 8 Jan 2016 13:38:02 -0800 Subject: [PATCH] When a node becomes unreachable, do not evict DaemonSet-managed pods. Part of the graduation requirement for DaemonSet spelled out in #15310. --- pkg/controller/node/nodecontroller.go | 23 +++++++ pkg/controller/node/nodecontroller_test.go | 76 ++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index d1dca9a1e24..5a426a3319d 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -111,6 +111,9 @@ type NodeController struct { // Node framework and store nodeController *framework.Controller nodeStore cache.StoreToNodeLister + // DaemonSet framework and store + daemonSetController *framework.Controller + daemonSetStore cache.StoreToDaemonSetLister forcefullyDeletePod func(*api.Pod) } @@ -191,6 +194,19 @@ func NewNodeController( controller.NoResyncPeriodFunc(), framework.ResourceEventHandlerFuncs{}, ) + nc.daemonSetStore.Store, nc.daemonSetController = framework.NewInformer( + &cache.ListWatch{ + ListFunc: func(options api.ListOptions) (runtime.Object, error) { + return nc.kubeClient.Extensions().DaemonSets(api.NamespaceAll).List(options) + }, + WatchFunc: func(options api.ListOptions) (watch.Interface, error) { + return nc.kubeClient.Extensions().DaemonSets(api.NamespaceAll).Watch(options) + }, + }, + &api.Node{}, + controller.NoResyncPeriodFunc(), + framework.ResourceEventHandlerFuncs{}, + ) return nc } @@ -198,6 +214,8 @@ func NewNodeController( func (nc *NodeController) Run(period time.Duration) { go nc.nodeController.Run(util.NeverStop) go nc.podController.Run(util.NeverStop) + go nc.daemonSetController.Run(util.NeverStop) + // Incorporate the results of node status pushed from kubelet to master. go util.Until(func() { if err := nc.monitorNodeStatus(); err != nil { @@ -752,6 +770,11 @@ func (nc *NodeController) deletePods(nodeName string) (bool, error) { if pod.DeletionGracePeriodSeconds != nil { continue } + // if the pod is managed by a daemonset, ignore it + _, err := nc.daemonSetStore.GetPodDaemonSets(&pod) + if err == nil { // No error means at least one daemonset was found + continue + } glog.V(2).Infof("Starting deletion of pod %v", pod.Name) nc.recorder.Eventf(&pod, api.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName) diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index 1fdef327f80..644230d3c82 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -26,6 +26,7 @@ import ( apierrors "k8s.io/kubernetes/pkg/api/errors" "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/api/unversioned" + "k8s.io/kubernetes/pkg/apis/extensions" "k8s.io/kubernetes/pkg/client/cache" client "k8s.io/kubernetes/pkg/client/unversioned" "k8s.io/kubernetes/pkg/client/unversioned/testclient" @@ -144,6 +145,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { table := []struct { fakeNodeHandler *FakeNodeHandler + daemonSets []extensions.DaemonSet timeToPass time.Duration newNodeStatus api.NodeStatus expectedEvictPods bool @@ -162,6 +164,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, + daemonSets: nil, timeToPass: 0, newNodeStatus: api.NodeStatus{}, expectedEvictPods: false, @@ -190,6 +193,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, + daemonSets: nil, timeToPass: evictionTimeout, newNodeStatus: api.NodeStatus{ Conditions: []api.NodeCondition{ @@ -205,6 +209,72 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { expectedEvictPods: false, description: "Node created long time ago, and kubelet posted NotReady for a short period of time.", }, + // Pod is ds-managed, and kubelet posted NotReady for a long period of time. + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionFalse, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + Fake: testclient.NewSimpleFake( + &api.PodList{ + Items: []api.Pod{ + { + ObjectMeta: api.ObjectMeta{ + Name: "pod0", + Namespace: "default", + Labels: map[string]string{"daemon": "yes"}, + }, + Spec: api.PodSpec{ + NodeName: "node0", + }, + }, + }, + }, + ), + }, + daemonSets: []extensions.DaemonSet{ + { + ObjectMeta: api.ObjectMeta{ + Name: "ds0", + Namespace: "default", + }, + Spec: extensions.DaemonSetSpec{ + Selector: &extensions.LabelSelector{ + MatchLabels: map[string]string{"daemon": "yes"}, + }, + }, + }, + }, + timeToPass: time.Hour, + newNodeStatus: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionFalse, + // Node status has just been updated, and is NotReady for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + expectedEvictPods: false, + description: "Pod is ds-managed, and kubelet posted NotReady for a long period of time.", + }, // Node created long time ago, and kubelet posted NotReady for a long period of time. { fakeNodeHandler: &FakeNodeHandler{ @@ -228,6 +298,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, + daemonSets: nil, timeToPass: time.Hour, newNodeStatus: api.NodeStatus{ Conditions: []api.NodeCondition{ @@ -266,6 +337,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, + daemonSets: nil, timeToPass: evictionTimeout - testNodeMonitorGracePeriod, newNodeStatus: api.NodeStatus{ Conditions: []api.NodeCondition{ @@ -304,6 +376,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, + daemonSets: nil, timeToPass: 60 * time.Minute, newNodeStatus: api.NodeStatus{ Conditions: []api.NodeCondition{ @@ -326,6 +399,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { evictionTimeout, util.NewFakeRateLimiter(), util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false) nodeController.now = func() unversioned.Time { return fakeNow } + for _, ds := range item.daemonSets { + nodeController.daemonSetStore.Add(&ds) + } if err := nodeController.monitorNodeStatus(); err != nil { t.Errorf("unexpected error: %v", err) }