From e35d808aa2b19782fd16e770617c75dafac87adc Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Mon, 1 Oct 2018 11:32:56 -0700 Subject: [PATCH] NodeLifecycleController treats node lease renewal as a heartbeat signal --- cmd/kube-controller-manager/app/core.go | 1 + pkg/controller/nodelifecycle/BUILD | 11 + .../node_lifecycle_controller.go | 78 ++- .../node_lifecycle_controller_test.go | 656 +++++++++++++++++- test/integration/scheduler/taint_test.go | 1 + 5 files changed, 715 insertions(+), 32 deletions(-) diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index b82e902f531..ace08a9818e 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -121,6 +121,7 @@ func startNodeIpamController(ctx ControllerContext) (http.Handler, bool, error) func startNodeLifecycleController(ctx ControllerContext) (http.Handler, bool, error) { lifecycleController, err := lifecyclecontroller.NewNodeLifecycleController( + ctx.InformerFactory.Coordination().V1beta1().Leases(), ctx.InformerFactory.Core().V1().Pods(), ctx.InformerFactory.Core().V1().Nodes(), ctx.InformerFactory.Extensions().V1beta1().DaemonSets(), diff --git a/pkg/controller/nodelifecycle/BUILD b/pkg/controller/nodelifecycle/BUILD index 9a7d6eeba28..7f2f4e1673d 100644 --- a/pkg/controller/nodelifecycle/BUILD +++ b/pkg/controller/nodelifecycle/BUILD @@ -13,11 +13,13 @@ go_library( "//pkg/controller:go_default_library", "//pkg/controller/nodelifecycle/scheduler:go_default_library", "//pkg/controller/util/node:go_default_library", + "//pkg/features:go_default_library", "//pkg/scheduler/algorithm:go_default_library", "//pkg/util/metrics:go_default_library", "//pkg/util/node:go_default_library", "//pkg/util/system:go_default_library", "//pkg/util/taints:go_default_library", + "//staging/src/k8s.io/api/coordination/v1beta1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", @@ -26,10 +28,13 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", + "//staging/src/k8s.io/client-go/informers/coordination/v1beta1:go_default_library", "//staging/src/k8s.io/client-go/informers/core/v1:go_default_library", "//staging/src/k8s.io/client-go/informers/extensions/v1beta1:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", + "//staging/src/k8s.io/client-go/listers/coordination/v1beta1:go_default_library", "//staging/src/k8s.io/client-go/listers/core/v1:go_default_library", "//staging/src/k8s.io/client-go/listers/extensions/v1beta1:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library", @@ -69,10 +74,12 @@ go_test( "//pkg/controller/nodelifecycle/scheduler:go_default_library", "//pkg/controller/testutil:go_default_library", "//pkg/controller/util/node:go_default_library", + "//pkg/features:go_default_library", "//pkg/kubelet/apis:go_default_library", "//pkg/scheduler/algorithm:go_default_library", "//pkg/util/node:go_default_library", "//pkg/util/taints:go_default_library", + "//staging/src/k8s.io/api/coordination/v1beta1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/extensions/v1beta1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", @@ -81,12 +88,16 @@ go_test( "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature/testing:go_default_library", "//staging/src/k8s.io/client-go/informers:go_default_library", + "//staging/src/k8s.io/client-go/informers/coordination/v1beta1:go_default_library", "//staging/src/k8s.io/client-go/informers/core/v1:go_default_library", "//staging/src/k8s.io/client-go/informers/extensions/v1beta1:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", "//staging/src/k8s.io/client-go/testing:go_default_library", "//staging/src/k8s.io/cloud-provider:go_default_library", + "//vendor/k8s.io/utils/pointer:go_default_library", ], ) diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index 854bd8561ed..f64a995cff6 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -31,6 +31,7 @@ import ( "github.com/golang/glog" + coordv1beta1 "k8s.io/api/coordination/v1beta1" "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -39,10 +40,13 @@ import ( "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + coordinformers "k8s.io/client-go/informers/coordination/v1beta1" coreinformers "k8s.io/client-go/informers/core/v1" extensionsinformers "k8s.io/client-go/informers/extensions/v1beta1" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" + coordlisters "k8s.io/client-go/listers/coordination/v1beta1" corelisters "k8s.io/client-go/listers/core/v1" extensionslisters "k8s.io/client-go/listers/extensions/v1beta1" "k8s.io/client-go/tools/cache" @@ -54,6 +58,7 @@ import ( "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler" nodeutil "k8s.io/kubernetes/pkg/controller/util/node" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/scheduler/algorithm" "k8s.io/kubernetes/pkg/util/metrics" utilnode "k8s.io/kubernetes/pkg/util/node" @@ -136,6 +141,7 @@ type nodeHealthData struct { probeTimestamp metav1.Time readyTransitionTimestamp metav1.Time status *v1.NodeStatus + lease *coordv1beta1.Lease } // Controller is the controller that manages node's life cycle. @@ -172,6 +178,8 @@ type Controller struct { daemonSetStore extensionslisters.DaemonSetLister daemonSetInformerSynced cache.InformerSynced + leaseLister coordlisters.LeaseLister + leaseInformerSynced cache.InformerSynced nodeLister corelisters.NodeLister nodeInformerSynced cache.InformerSynced nodeExistsInCloudProvider func(types.NodeName) (bool, error) @@ -190,19 +198,23 @@ type Controller struct { nodeStartupGracePeriod time.Duration // Controller will not proactively sync node health, but will monitor node - // health signal updated from kubelet. If it doesn't receive update for this - // amount of time, it will start posting "NodeReady==ConditionUnknown". The - // amount of time before which Controller start evicting pods is controlled - // via flag 'pod-eviction-timeout'. + // health signal updated from kubelet. There are 2 kinds of node healthiness + // signals: NodeStatus and NodeLease. NodeLease signal is generated only when + // NodeLease feature is enabled. If it doesn't receive update for this amount + // of time, it will start posting "NodeReady==ConditionUnknown". The amount of + // time before which Controller start evicting pods is controlled via flag + // 'pod-eviction-timeout'. // Note: be cautious when changing the constant, it must work with - // nodeStatusUpdateFrequency in kubelet. There are several constraints: - // 1. nodeMonitorGracePeriod must be N times more than - // nodeStatusUpdateFrequency, where N means number of retries allowed for - // kubelet to post node health signal. It is pointless to make - // nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since - // there will only be fresh values from Kubelet at an interval of - // nodeStatusUpdateFrequency. The constant must be less than - // podEvictionTimeout. + // nodeStatusUpdateFrequency in kubelet and renewInterval in NodeLease + // controller. The node health signal update frequency is the minimal of the + // two. + // There are several constraints: + // 1. nodeMonitorGracePeriod must be N times more than the node health signal + // update frequency, where N means number of retries allowed for kubelet to + // post node status/lease. It is pointless to make nodeMonitorGracePeriod + // be less than the node health signal update frequency, since there will + // only be fresh values from Kubelet at an interval of node health signal + // update frequency. The constant must be less than podEvictionTimeout. // 2. nodeMonitorGracePeriod can't be too large for user experience - larger // value takes longer for user to see up-to-date node health. nodeMonitorGracePeriod time.Duration @@ -229,7 +241,9 @@ type Controller struct { } // NewNodeLifecycleController returns a new taint controller. -func NewNodeLifecycleController(podInformer coreinformers.PodInformer, +func NewNodeLifecycleController( + leaseInformer coordinformers.LeaseInformer, + podInformer coreinformers.PodInformer, nodeInformer coreinformers.NodeInformer, daemonSetInformer extensionsinformers.DaemonSetInformer, cloud cloudprovider.Interface, @@ -373,6 +387,9 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer, }), }) + nc.leaseLister = leaseInformer.Lister() + nc.leaseInformerSynced = leaseInformer.Informer().HasSynced + nc.nodeLister = nodeInformer.Lister() nc.nodeInformerSynced = nodeInformer.Informer().HasSynced @@ -389,7 +406,7 @@ func (nc *Controller) Run(stopCh <-chan struct{}) { glog.Infof("Starting node controller") defer glog.Infof("Shutting down node controller") - if !controller.WaitForCacheSync("taint", stopCh, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) { + if !controller.WaitForCacheSync("taint", stopCh, nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) { return } @@ -811,7 +828,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node _, currentReadyCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady) if currentReadyCondition == nil { // If ready condition is nil, then kubelet (or nodecontroller) never posted node status. - // A fake ready condition is created, where LastProbeTime and LastTransitionTime is set + // A fake ready condition is created, where LastHeartbeatTime and LastTransitionTime is set // to node.CreationTimestamp to avoid handle the corner case. observedReadyCondition = v1.NodeCondition{ Type: v1.NodeReady, @@ -820,10 +837,14 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node LastTransitionTime: node.CreationTimestamp, } gracePeriod = nc.nodeStartupGracePeriod - nc.nodeHealthMap[node.Name] = &nodeHealthData{ - status: &node.Status, - probeTimestamp: node.CreationTimestamp, - readyTransitionTimestamp: node.CreationTimestamp, + if _, found := nc.nodeHealthMap[node.Name]; found { + nc.nodeHealthMap[node.Name].status = &node.Status + } else { + nc.nodeHealthMap[node.Name] = &nodeHealthData{ + status: &node.Status, + probeTimestamp: node.CreationTimestamp, + readyTransitionTimestamp: node.CreationTimestamp, + } } } else { // If ready condition is not nil, make a copy of it, since we may modify it in place later. @@ -847,8 +868,10 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node // - currently only correct Ready State transition outside of Node Controller is marking it ready by Kubelet, we don't check // if that's the case, but it does not seem necessary. var savedCondition *v1.NodeCondition + var savedLease *coordv1beta1.Lease if found { _, savedCondition = v1node.GetNodeCondition(savedNodeHealth.status, v1.NodeReady) + savedLease = savedNodeHealth.lease } _, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady) if !found { @@ -894,11 +917,23 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node readyTransitionTimestamp: transitionTime, } } + var observedLease *coordv1beta1.Lease + if utilfeature.DefaultFeatureGate.Enabled(features.NodeLease) { + // Always update the probe time if node lease is renewed. + // Note: If kubelet never posted the node status, but continues renewing the + // heartbeat leases, the node controller will assume the node is healthy and + // take no action. + observedLease, _ = nc.leaseLister.Leases(v1.NamespaceNodeLease).Get(node.Name) + if observedLease != nil && (savedLease == nil || savedLease.Spec.RenewTime.Before(observedLease.Spec.RenewTime)) { + savedNodeHealth.lease = observedLease + savedNodeHealth.probeTimestamp = nc.now() + } + } nc.nodeHealthMap[node.Name] = savedNodeHealth if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) { - // NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown - // (regardless of its current value) in the master. + // NodeReady condition or lease was last set longer ago than gracePeriod, so + // update it to Unknown (regardless of its current value) in the master. if currentReadyCondition == nil { glog.V(2).Infof("node %v is never updated by kubelet", node.Name) node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ @@ -967,6 +1002,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node status: &node.Status, probeTimestamp: nc.nodeHealthMap[node.Name].probeTimestamp, readyTransitionTimestamp: nc.now(), + lease: observedLease, } return gracePeriod, observedReadyCondition, currentReadyCondition, nil } diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go index f7467dfa717..645e602cf08 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + coordv1beta1 "k8s.io/api/coordination/v1beta1" "k8s.io/api/core/v1" extensions "k8s.io/api/extensions/v1beta1" apiequality "k8s.io/apimachinery/pkg/api/equality" @@ -30,7 +31,10 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing" "k8s.io/client-go/informers" + coordinformers "k8s.io/client-go/informers/coordination/v1beta1" coreinformers "k8s.io/client-go/informers/core/v1" extensionsinformers "k8s.io/client-go/informers/extensions/v1beta1" clientset "k8s.io/client-go/kubernetes" @@ -42,10 +46,12 @@ import ( "k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler" "k8s.io/kubernetes/pkg/controller/testutil" nodeutil "k8s.io/kubernetes/pkg/controller/util/node" + "k8s.io/kubernetes/pkg/features" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" "k8s.io/kubernetes/pkg/scheduler/algorithm" "k8s.io/kubernetes/pkg/util/node" taintutils "k8s.io/kubernetes/pkg/util/taints" + "k8s.io/utils/pointer" ) const ( @@ -61,6 +67,7 @@ func alwaysReady() bool { return true } type nodeLifecycleController struct { *Controller + leaseInformer coordinformers.LeaseInformer nodeInformer coreinformers.NodeInformer daemonSetInformer extensionsinformers.DaemonSetInformer } @@ -86,6 +93,28 @@ func (nc *nodeLifecycleController) doEviction(fakeNodeHandler *testutil.FakeNode return podEvicted } +func createNodeLease(nodeName string, renewTime metav1.MicroTime) *coordv1beta1.Lease { + return &coordv1beta1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Namespace: v1.NamespaceNodeLease, + }, + Spec: coordv1beta1.LeaseSpec{ + HolderIdentity: pointer.StringPtr(nodeName), + RenewTime: &renewTime, + }, + } +} + +func (nc *nodeLifecycleController) syncLeaseStore(lease *coordv1beta1.Lease) error { + if lease == nil { + return nil + } + newElems := make([]interface{}, 0, 1) + newElems = append(newElems, lease) + return nc.leaseInformer.Informer().GetStore().Replace(newElems, "newRV") +} + func (nc *nodeLifecycleController) syncNodeStore(fakeNodeHandler *testutil.FakeNodeHandler) error { nodes, err := fakeNodeHandler.List(metav1.ListOptions{}) if err != nil { @@ -114,10 +143,12 @@ func newNodeLifecycleControllerFromClient( factory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc()) + leaseInformer := factory.Coordination().V1beta1().Leases() nodeInformer := factory.Core().V1().Nodes() daemonSetInformer := factory.Extensions().V1beta1().DaemonSets() nc, err := NewNodeLifecycleController( + leaseInformer, factory.Core().V1().Pods(), nodeInformer, daemonSetInformer, @@ -139,11 +170,12 @@ func newNodeLifecycleControllerFromClient( return nil, err } + nc.leaseInformerSynced = alwaysReady nc.podInformerSynced = alwaysReady nc.nodeInformerSynced = alwaysReady nc.daemonSetInformerSynced = alwaysReady - return &nodeLifecycleController{nc, nodeInformer, daemonSetInformer}, nil + return &nodeLifecycleController{nc, leaseInformer, nodeInformer, daemonSetInformer}, nil } func TestMonitorNodeHealthEvictPods(t *testing.T) { @@ -1543,12 +1575,12 @@ func TestCloudProviderNoRateLimit(t *testing.T) { func TestMonitorNodeHealthUpdateStatus(t *testing.T) { fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) table := []struct { - fakeNodeHandler *testutil.FakeNodeHandler - timeToPass time.Duration - newNodeStatus v1.NodeStatus - expectedEvictPods bool - expectedRequestCount int - expectedNodes []*v1.Node + fakeNodeHandler *testutil.FakeNodeHandler + timeToPass time.Duration + newNodeStatus v1.NodeStatus + expectedRequestCount int + expectedNodes []*v1.Node + expectedPodStatusUpdate bool }{ // Node created long time ago, without status: // Expect Unknown status posted from node controller. @@ -1617,6 +1649,7 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) { }, }, }, + expectedPodStatusUpdate: false, // Pod was never scheduled }, // Node created recently, without status. // Expect no action from node controller (within startup grace period). @@ -1632,8 +1665,9 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) { }, Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), }, - expectedRequestCount: 1, // List - expectedNodes: nil, + expectedRequestCount: 1, // List + expectedNodes: nil, + expectedPodStatusUpdate: false, }, // Node created long time ago, with status updated by kubelet exceeds grace period. // Expect Unknown status posted from node controller. @@ -1751,6 +1785,7 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) { }, }, }, + expectedPodStatusUpdate: true, }, // Node created long time ago, with status updated recently. // Expect no action from node controller (within monitor grace period). @@ -1781,8 +1816,9 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) { }, Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), }, - expectedRequestCount: 1, // List - expectedNodes: nil, + expectedRequestCount: 1, // List + expectedNodes: nil, + expectedPodStatusUpdate: false, }, } @@ -1826,6 +1862,604 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) { if len(item.fakeNodeHandler.UpdatedNodeStatuses) > 0 && !apiequality.Semantic.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodeStatuses) { t.Errorf("Case[%d] unexpected nodes: %s", i, diff.ObjectDiff(item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodeStatuses[0])) } + + podStatusUpdated := false + for _, action := range item.fakeNodeHandler.Actions() { + if action.GetVerb() == "update" && action.GetResource().Resource == "pods" && action.GetSubresource() == "status" { + podStatusUpdated = true + } + } + if podStatusUpdated != item.expectedPodStatusUpdate { + t.Errorf("Case[%d] expect pod status updated to be %v, but got %v", i, item.expectedPodStatusUpdate, podStatusUpdated) + } + } +} + +func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) { + defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeLease, true)() + + nodeCreationTime := metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC) + fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) + testcases := []struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + lease *coordv1beta1.Lease + timeToPass time.Duration + newNodeStatus v1.NodeStatus + newLease *coordv1beta1.Lease + expectedRequestCount int + expectedNodes []*v1.Node + expectedPodStatusUpdate bool + }{ + // Node created recently, without status. Node lease is missing. + // Expect no action from node controller (within startup grace period). + { + description: "Node created recently, without status. Node lease is missing.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: fakeNow, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + expectedRequestCount: 1, // List + expectedNodes: nil, + expectedPodStatusUpdate: false, + }, + // Node created recently, without status. Node lease is renewed recently. + // Expect no action from node controller (within startup grace period). + { + description: "Node created recently, without status. Node lease is renewed recently.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: fakeNow, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + lease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), + expectedRequestCount: 1, // List + expectedNodes: nil, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, without status. Node lease is missing. + // Expect Unknown status posted from node controller. + { + description: "Node created long time ago, without status. Node lease is missing.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + expectedRequestCount: 2, // List+Update + expectedNodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeMemoryPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeDiskPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodePIDPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: fakeNow, + }, + }, + }, + }, + }, + expectedPodStatusUpdate: false, // Pod was never scheduled because the node was never ready. + }, + // Node created long time ago, without status. Node lease is renewed recently. + // Expect no action from node controller (within monitor grace period). + { + description: "Node created long time ago, without status. Node lease is renewed recently.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + lease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), + timeToPass: time.Hour, + newLease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time.Add(time.Hour))), // Lease is renewed after 1 hour. + expectedRequestCount: 2, // List+List + expectedNodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + }, + }, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, without status. Node lease is expired. + // Expect Unknown status posted from node controller. + { + description: "Node created long time ago, without status. Node lease is expired.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + lease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), + timeToPass: time.Hour, + newLease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), // Lease is not renewed after 1 hour. + expectedRequestCount: 3, // List+List+Update + expectedNodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodeMemoryPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodeDiskPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodePIDPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + }, + }, + }, + }, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, with status updated by kubelet exceeds grace period. Node lease is renewed. + // Expect no action from node controller (within monitor grace period). + { + description: "Node created long time ago, with status updated by kubelet exceeds grace period. Node lease is renewed.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + lease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), + expectedRequestCount: 2, // List+List + timeToPass: time.Hour, + newNodeStatus: v1.NodeStatus{ + // Node status hasn't been updated for 1 hour. + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + newLease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time.Add(time.Hour))), // Lease is renewed after 1 hour. + expectedNodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + }, + }, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, with status updated by kubelet recently. Node lease is expired. + // Expect no action from node controller (within monitor grace period). + { + description: "Node created long time ago, with status updated by kubelet recently. Node lease is expired.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + lease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), + expectedRequestCount: 2, // List+List + timeToPass: time.Hour, + newNodeStatus: v1.NodeStatus{ + // Node status is updated after 1 hour. + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + newLease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), // Lease is not renewed after 1 hour. + expectedNodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + }, + }, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, with status updated by kubelet exceeds grace period. Node lease is also expired. + // Expect Unknown status posted from node controller. + { + description: "Node created long time ago, with status updated by kubelet exceeds grace period. Node lease is also expired.", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + lease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), + expectedRequestCount: 3, // List+List+Update + timeToPass: time.Hour, + newNodeStatus: v1.NodeStatus{ + // Node status hasn't been updated for 1 hour. + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionFalse, + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + newLease: createNodeLease("node0", metav1.NewMicroTime(fakeNow.Time)), // Lease is not renewed after 1 hour. + expectedNodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: nodeCreationTime, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionUnknown, + Reason: "NodeStatusUnknown", + Message: "Kubelet stopped posting node status.", + LastHeartbeatTime: fakeNow, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodeOutOfDisk, + Status: v1.ConditionUnknown, + Reason: "NodeStatusUnknown", + Message: "Kubelet stopped posting node status.", + LastHeartbeatTime: fakeNow, + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodeMemoryPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, // should default to node creation time if condition was never updated + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodeDiskPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, // should default to node creation time if condition was never updated + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + { + Type: v1.NodePIDPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: nodeCreationTime, // should default to node creation time if condition was never updated + LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)}, + }, + }, + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + }, + }, + expectedPodStatusUpdate: true, + }, + } + + for _, item := range testcases { + t.Run(item.description, func(t *testing.T) { + nodeController, _ := newNodeLifecycleControllerFromClient( + nil, + item.fakeNodeHandler, + 5*time.Minute, + testRateLimiterQPS, + testRateLimiterQPS, + testLargeClusterThreshold, + testUnhealthyThreshold, + testNodeMonitorGracePeriod, + testNodeStartupGracePeriod, + testNodeMonitorPeriod, + false) + nodeController.now = func() metav1.Time { return fakeNow } + nodeController.recorder = testutil.NewFakeRecorder() + if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if err := nodeController.syncLeaseStore(item.lease); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if err := nodeController.monitorNodeHealth(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if item.timeToPass > 0 { + nodeController.now = func() metav1.Time { return metav1.Time{Time: fakeNow.Add(item.timeToPass)} } + item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus + if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if err := nodeController.syncLeaseStore(item.newLease); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if err := nodeController.monitorNodeHealth(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + if item.expectedRequestCount != item.fakeNodeHandler.RequestCount { + t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount) + } + if len(item.fakeNodeHandler.UpdatedNodes) > 0 && !apiequality.Semantic.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodes) { + t.Errorf("unexpected nodes: %s", diff.ObjectDiff(item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodes[0])) + } + if len(item.fakeNodeHandler.UpdatedNodeStatuses) > 0 && !apiequality.Semantic.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodeStatuses) { + t.Errorf("unexpected nodes: %s", diff.ObjectDiff(item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodeStatuses[0])) + } + + podStatusUpdated := false + for _, action := range item.fakeNodeHandler.Actions() { + if action.GetVerb() == "update" && action.GetResource().Resource == "pods" && action.GetSubresource() == "status" { + podStatusUpdated = true + } + } + if podStatusUpdated != item.expectedPodStatusUpdate { + t.Errorf("expect pod status updated to be %v, but got %v", item.expectedPodStatusUpdate, podStatusUpdated) + } + }) } } diff --git a/test/integration/scheduler/taint_test.go b/test/integration/scheduler/taint_test.go index 15e04ca5e2c..58949f6d8fb 100644 --- a/test/integration/scheduler/taint_test.go +++ b/test/integration/scheduler/taint_test.go @@ -99,6 +99,7 @@ func TestTaintNodeByCondition(t *testing.T) { // Start NodeLifecycleController for taint. nc, err := nodelifecycle.NewNodeLifecycleController( + informers.Coordination().V1beta1().Leases(), informers.Core().V1().Pods(), informers.Core().V1().Nodes(), informers.Extensions().V1beta1().DaemonSets(),