From 27d8a4feef65c126dbd033997ad73f38f258cf33 Mon Sep 17 00:00:00 2001 From: andrewsykim Date: Thu, 10 Nov 2016 13:09:27 -0500 Subject: [PATCH] set all node conditions to Unknown when node is unreachable --- pkg/controller/node/nodecontroller.go | 48 +++++++++++----------- pkg/controller/node/nodecontroller_test.go | 32 +++++++++++++++ 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index ee16317a986..ae9fbfa90e2 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -743,35 +743,37 @@ func (nc *NodeController) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1. if observedReadyCondition.Status != v1.ConditionUnknown { currentReadyCondition.Status = v1.ConditionUnknown currentReadyCondition.Reason = "NodeStatusUnknown" - currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") + currentReadyCondition.Message = "Kubelet stopped posting node status." // LastProbeTime is the last time we heard from kubelet. currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime currentReadyCondition.LastTransitionTime = nc.now() } } - // Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update - // it to Unknown (regardless of its current value) in the master. - // TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code. - _, oodCondition := v1.GetNodeCondition(&node.Status, v1.NodeOutOfDisk) - if oodCondition == nil { - glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name) - node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ - Type: v1.NodeOutOfDisk, - Status: v1.ConditionUnknown, - Reason: "NodeStatusNeverUpdated", - Message: fmt.Sprintf("Kubelet never posted node status."), - LastHeartbeatTime: node.CreationTimestamp, - LastTransitionTime: nc.now(), - }) - } else { - glog.V(4).Infof("node %v hasn't been updated for %+v. Last out of disk condition is: %+v", - node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), oodCondition) - if oodCondition.Status != v1.ConditionUnknown { - oodCondition.Status = v1.ConditionUnknown - oodCondition.Reason = "NodeStatusUnknown" - oodCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") - oodCondition.LastTransitionTime = nc.now() + // remaining node conditions should also be set to Unknown + remainingNodeConditionTypes := []v1.NodeConditionType{v1.NodeOutOfDisk, v1.NodeMemoryPressure, v1.NodeDiskPressure} + nowTimestamp := nc.now() + for _, nodeConditionType := range remainingNodeConditionTypes { + _, currentCondition := v1.GetNodeCondition(&node.Status, nodeConditionType) + if currentCondition == nil { + glog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name) + node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ + Type: nodeConditionType, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: node.CreationTimestamp, + LastTransitionTime: nowTimestamp, + }) + } else { + glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v", + node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition) + if currentCondition.Status != v1.ConditionUnknown { + currentCondition.Status = v1.ConditionUnknown + currentCondition.Reason = "NodeStatusUnknown" + currentCondition.Message = "Kubelet stopped posting node status." + currentCondition.LastTransitionTime = nowTimestamp + } } } diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index a3946967980..26008d79e7b 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -1342,6 +1342,22 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), LastTransitionTime: fakeNow, }, + { + Type: v1.NodeMemoryPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + LastTransitionTime: fakeNow, + }, + { + Type: v1.NodeDiskPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + LastTransitionTime: fakeNow, + }, }, }, }, @@ -1451,6 +1467,22 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, }, + { + Type: v1.NodeMemoryPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), // should default to node creation time if condition was never updated + LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, + }, + { + Type: v1.NodeDiskPressure, + Status: v1.ConditionUnknown, + Reason: "NodeStatusNeverUpdated", + Message: "Kubelet never posted node status.", + LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), // should default to node creation time if condition was never updated + LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, + }, }, Capacity: v1.ResourceList{ v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),