Merge pull request #36592 from andrewsykim/36273-set-all-node-conditions-unknown-when-node-unreachable

Automatic merge from submit-queue (batch tested with PRs 40917, 41181, 41123, 36592, 41183)

Set all node conditions to Unknown when node is unreachable

**What this PR does / why we need it**:
Sets all node conditions to Unknown when node does not report status/unreachable

**Which issue this PR fixes** 
fixes https://github.com/kubernetes/kubernetes/issues/36273
This commit is contained in:
Kubernetes Submit Queue 2017-02-09 23:10:47 -08:00 committed by GitHub
commit 85b4d2e5cf
2 changed files with 57 additions and 23 deletions

View File

@ -742,35 +742,37 @@ func (nc *NodeController) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.
if observedReadyCondition.Status != v1.ConditionUnknown { if observedReadyCondition.Status != v1.ConditionUnknown {
currentReadyCondition.Status = v1.ConditionUnknown currentReadyCondition.Status = v1.ConditionUnknown
currentReadyCondition.Reason = "NodeStatusUnknown" currentReadyCondition.Reason = "NodeStatusUnknown"
currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") currentReadyCondition.Message = "Kubelet stopped posting node status."
// LastProbeTime is the last time we heard from kubelet. // LastProbeTime is the last time we heard from kubelet.
currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
currentReadyCondition.LastTransitionTime = nc.now() currentReadyCondition.LastTransitionTime = nc.now()
} }
} }
// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update // remaining node conditions should also be set to Unknown
// it to Unknown (regardless of its current value) in the master. remainingNodeConditionTypes := []v1.NodeConditionType{v1.NodeOutOfDisk, v1.NodeMemoryPressure, v1.NodeDiskPressure}
// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code. nowTimestamp := nc.now()
_, oodCondition := v1.GetNodeCondition(&node.Status, v1.NodeOutOfDisk) for _, nodeConditionType := range remainingNodeConditionTypes {
if oodCondition == nil { _, currentCondition := v1.GetNodeCondition(&node.Status, nodeConditionType)
glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name) if currentCondition == nil {
glog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: v1.NodeOutOfDisk, Type: nodeConditionType,
Status: v1.ConditionUnknown, Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated", Reason: "NodeStatusNeverUpdated",
Message: fmt.Sprintf("Kubelet never posted node status."), Message: "Kubelet never posted node status.",
LastHeartbeatTime: node.CreationTimestamp, LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(), LastTransitionTime: nowTimestamp,
}) })
} else { } else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last out of disk condition is: %+v", glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), oodCondition) node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition)
if oodCondition.Status != v1.ConditionUnknown { if currentCondition.Status != v1.ConditionUnknown {
oodCondition.Status = v1.ConditionUnknown currentCondition.Status = v1.ConditionUnknown
oodCondition.Reason = "NodeStatusUnknown" currentCondition.Reason = "NodeStatusUnknown"
oodCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") currentCondition.Message = "Kubelet stopped posting node status."
oodCondition.LastTransitionTime = nc.now() currentCondition.LastTransitionTime = nowTimestamp
}
} }
} }

View File

@ -1374,6 +1374,22 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow, LastTransitionTime: fakeNow,
}, },
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeDiskPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
}, },
}, },
}, },
@ -1483,6 +1499,22 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)}, LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
}, },
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), // should default to node creation time if condition was never updated
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
{
Type: v1.NodeDiskPressure,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), // should default to node creation time if condition was never updated
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
}, },
Capacity: v1.ResourceList{ Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),