Merge pull request #49841 from dashpole/fix_gpu

Automatic merge from submit-queue (batch tested with PRs 49237, 49656, 49980, 49841, 49899)

[Bug Fix] Set NodeOODCondition to false

fixes #49839, which was introduced by #48846

This PR makes the kubelet set NodeOODCondition to false, so that the scheduler and other controllers do not consider the node to be unschedulable.

/assign @vishh 
/sig node
/release-note-none
This commit is contained in:
Kubernetes Submit Queue 2017-08-02 19:11:15 -07:00 committed by GitHub
commit f6e2eabe2e
2 changed files with 64 additions and 0 deletions

View File

@ -852,6 +852,37 @@ func (kl *Kubelet) setNodeDiskPressureCondition(node *v1.Node) {
}
}
// Set OODCondition for the node.
func (kl *Kubelet) setNodeOODCondition(node *v1.Node) {
currentTime := metav1.NewTime(kl.clock.Now())
var nodeOODCondition *v1.NodeCondition
// Check if NodeOutOfDisk condition already exists and if it does, just pick it up for update.
for i := range node.Status.Conditions {
if node.Status.Conditions[i].Type == v1.NodeOutOfDisk {
nodeOODCondition = &node.Status.Conditions[i]
}
}
newOODCondition := nodeOODCondition == nil
if newOODCondition || nodeOODCondition.Status == v1.ConditionUnknown {
nodeOODCondition = &v1.NodeCondition{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: "kubelet has sufficient disk space available",
LastTransitionTime: currentTime,
}
}
// Update the heartbeat time irrespective of all the conditions.
nodeOODCondition.LastHeartbeatTime = currentTime
if newOODCondition {
node.Status.Conditions = append(node.Status.Conditions, *nodeOODCondition)
}
}
// Maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
// TODO: why is this a package var?
var oldNodeUnschedulable bool
@ -902,6 +933,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
return []func(*v1.Node) error{
kl.setNodeAddress,
withoutError(kl.setNodeStatusInfo),
withoutError(kl.setNodeOODCondition),
withoutError(kl.setNodeMemoryPressureCondition),
withoutError(kl.setNodeDiskPressureCondition),
withoutError(kl.setNodeReadyCondition),

View File

@ -168,6 +168,14 @@ func TestUpdateNewNodeStatus(t *testing.T) {
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionFalse,
@ -267,6 +275,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionFalse,
@ -326,6 +342,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionFalse,
@ -460,6 +484,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionFalse,