From 15c9826061e15f6225082564b0904e56c21d0a41 Mon Sep 17 00:00:00 2001 From: bprashanth Date: Wed, 17 Aug 2016 15:33:35 -0700 Subject: [PATCH] Nodecontroller doesn't flip readiness on pods if kubeletVersion < 1.2.0 --- pkg/controller/node/controller_utils.go | 27 +++- pkg/controller/node/nodecontroller.go | 6 +- pkg/controller/node/nodecontroller_test.go | 136 +++++++++++++++++++++ 3 files changed, 167 insertions(+), 2 deletions(-) diff --git a/pkg/controller/node/controller_utils.go b/pkg/controller/node/controller_utils.go index 5af5c866bbb..7f4dfd60672 100644 --- a/pkg/controller/node/controller_utils.go +++ b/pkg/controller/node/controller_utils.go @@ -209,7 +209,15 @@ func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) { // update ready status of all pods running on given node from master // return true if success -func markAllPodsNotReady(kubeClient clientset.Interface, nodeName string) error { +func markAllPodsNotReady(kubeClient clientset.Interface, node *api.Node) error { + // Don't set pods to NotReady if the kubelet is running a version that + // doesn't understand how to correct readiness. + // TODO: Remove this check when we no longer guarantee backward compatibility + // with node versions < 1.2.0. + if nodeRunningOutdatedKubelet(node) { + return nil + } + nodeName := node.Name glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName) opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName)} pods, err := kubeClient.Core().Pods(api.NamespaceAll).List(opts) @@ -243,6 +251,23 @@ func markAllPodsNotReady(kubeClient clientset.Interface, nodeName string) error return fmt.Errorf("%v", strings.Join(errMsg, "; ")) } +// nodeRunningOutdatedKubelet returns true if the kubeletVersion reported +// in the nodeInfo of the given node is "outdated", meaning < 1.2.0. +// Older versions were inflexible and modifying pod.Status directly through +// the apiserver would result in unexpected outcomes. +func nodeRunningOutdatedKubelet(node *api.Node) bool { + v, err := version.Parse(node.Status.NodeInfo.KubeletVersion) + if err != nil { + glog.Errorf("couldn't parse version %q of node %v", node.Status.NodeInfo.KubeletVersion, err) + return true + } + if podStatusReconciliationVersion.GT(v) { + glog.Infof("Node %v running kubelet at (%v) which is less than the minimum version that allows nodecontroller to mark pods NotReady (%v).", node.Name, v, podStatusReconciliationVersion) + return true + } + return false +} + func nodeExistsInCloudProvider(cloud cloudprovider.Interface, nodeName string) (bool, error) { instances, ok := cloud.Instances() if !ok { diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index 656e1742f02..0e5c2ff3fb4 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -51,6 +51,10 @@ import ( var ( ErrCloudInstance = errors.New("cloud provider doesn't support instances.") gracefulDeletionVersion = version.MustParse("v1.1.0") + + // The minimum kubelet version for which the nodecontroller + // can safely flip pod.Status to NotReady. + podStatusReconciliationVersion = version.MustParse("v1.2.0") ) const ( @@ -527,7 +531,7 @@ func (nc *NodeController) monitorNodeStatus() error { // Report node event. if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue { recordNodeStatusChange(nc.recorder, node, "NodeNotReady") - if err = markAllPodsNotReady(nc.kubeClient, node.Name); err != nil { + if err = markAllPodsNotReady(nc.kubeClient, node); err != nil { utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) } } diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index 29f87b4abcd..3fa61a2affc 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -1398,6 +1398,9 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, Status: api.NodeStatus{ + NodeInfo: api.NodeSystemInfo{ + KubeletVersion: "v1.2.0", + }, Conditions: []api.NodeCondition{ { Type: api.NodeReady, @@ -1428,6 +1431,9 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { }, timeToPass: 1 * time.Minute, newNodeStatus: api.NodeStatus{ + NodeInfo: api.NodeSystemInfo{ + KubeletVersion: "v1.2.0", + }, Conditions: []api.NodeCondition{ { Type: api.NodeReady, @@ -1451,6 +1457,76 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { }, expectedPodStatusUpdate: true, }, + // Node created long time ago, with outdated kubelet version 1.1.0 and status + // updated by kubelet exceeds grace period. Expect no action from node controller. + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + NodeInfo: api.NodeSystemInfo{ + KubeletVersion: "v1.1.0", + }, + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionFalse, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + Capacity: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("10"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + Spec: api.NodeSpec{ + ExternalID: "node0", + }, + }, + }, + Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), + }, + timeToPass: 1 * time.Minute, + newNodeStatus: api.NodeStatus{ + NodeInfo: api.NodeSystemInfo{ + KubeletVersion: "v1.1.0", + }, + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionFalse, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + Capacity: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("10"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + expectedPodStatusUpdate: false, + }, } for i, item := range table { @@ -1799,3 +1875,63 @@ func TestCleanupOrphanedPods(t *testing.T) { t.Fatalf("expected deleted pod name to be 'c', but got: %q", deletedPodName) } } + +func TestCheckNodeKubeletVersionParsing(t *testing.T) { + tests := []struct { + version string + outdated bool + }{ + { + version: "", + outdated: true, + }, + { + version: "v0.21.4", + outdated: true, + }, + { + version: "v1.0.0", + outdated: true, + }, + { + version: "v1.1.0", + outdated: true, + }, + { + version: "v1.1.0-alpha.2.961+9d4c6846fc03b9-dirty", + outdated: true, + }, + { + version: "v1.2.0", + outdated: false, + }, + { + version: "v1.3.3", + outdated: false, + }, + { + version: "v1.4.0-alpha.2.961+9d4c6846fc03b9-dirty", + outdated: false, + }, + { + version: "v2.0.0", + outdated: false, + }, + } + + for _, ov := range tests { + n := &api.Node{ + Status: api.NodeStatus{ + NodeInfo: api.NodeSystemInfo{ + KubeletVersion: ov.version, + }, + }, + } + isOutdated := nodeRunningOutdatedKubelet(n) + if ov.outdated != isOutdated { + t.Errorf("Version %v doesn't match test expectation. Expected outdated %v got %v", n.Status.NodeInfo.KubeletVersion, ov.outdated, isOutdated) + } else { + t.Logf("Version %v outdated %v", ov.version, isOutdated) + } + } +}