set all node conditions to Unknown when node is unreachable

This commit is contained in:
andrewsykim 2016-11-10 13:09:27 -05:00
parent 05c2a0825e
commit 27d8a4feef
2 changed files with 57 additions and 23 deletions

View File

@ -743,35 +743,37 @@ func (nc *NodeController) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.
if observedReadyCondition.Status != v1.ConditionUnknown {
currentReadyCondition.Status = v1.ConditionUnknown
currentReadyCondition.Reason = "NodeStatusUnknown"
currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
currentReadyCondition.Message = "Kubelet stopped posting node status."
// LastProbeTime is the last time we heard from kubelet.
currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
currentReadyCondition.LastTransitionTime = nc.now()
}
}
// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update
// it to Unknown (regardless of its current value) in the master.
// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code.
_, oodCondition := v1.GetNodeCondition(&node.Status, v1.NodeOutOfDisk)
if oodCondition == nil {
glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: fmt.Sprintf("Kubelet never posted node status."),
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(),
})
} else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last out of disk condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), oodCondition)
if oodCondition.Status != v1.ConditionUnknown {
oodCondition.Status = v1.ConditionUnknown
oodCondition.Reason = "NodeStatusUnknown"
oodCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
oodCondition.LastTransitionTime = nc.now()
// remaining node conditions should also be set to Unknown
remainingNodeConditionTypes := []v1.NodeConditionType{v1.NodeOutOfDisk, v1.NodeMemoryPressure, v1.NodeDiskPressure}
nowTimestamp := nc.now()
for _, nodeConditionType := range remainingNodeConditionTypes {
_, currentCondition := v1.GetNodeCondition(&node.Status, nodeConditionType)
if currentCondition == nil {
glog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: nodeConditionType,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nowTimestamp,
})
} else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition)
if currentCondition.Status != v1.ConditionUnknown {
currentCondition.Status = v1.ConditionUnknown
currentCondition.Reason = "NodeStatusUnknown"
currentCondition.Message = "Kubelet stopped posting node status."
currentCondition.LastTransitionTime = nowTimestamp
}
}
}

View File

@ -1342,6 +1342,22 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeDiskPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
},
},
},
@ -1451,6 +1467,22 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), // should default to node creation time if condition was never updated
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
{
Type: v1.NodeDiskPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), // should default to node creation time if condition was never updated
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),