From 9c4424f0bd55b8f2fb4610a4616fbd5cd30abc9a Mon Sep 17 00:00:00 2001 From: "Madhusudan.C.S" Date: Thu, 22 Oct 2015 12:14:56 -0700 Subject: [PATCH] Report out of disk as a node condition when node goes out of disk. Define a new out of disk node condition and use it to report when node goes out of disk. Make a copy of loop range clause variable in node listers so that it is available outside the for loop. Also update/implement unit tests. --- pkg/api/types.go | 3 + pkg/api/v1/types.go | 3 + pkg/client/cache/listers.go | 4 +- pkg/kubelet/kubelet.go | 57 +++++++++++++++ pkg/kubelet/kubelet_test.go | 135 ++++++++++++++++++++++++++++++------ 5 files changed, 178 insertions(+), 24 deletions(-) diff --git a/pkg/api/types.go b/pkg/api/types.go index 1c4f6b6612d..4ba8dafce8d 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -1491,6 +1491,9 @@ type NodeConditionType string const ( // NodeReady means kubelet is healthy and ready to accept pods. NodeReady NodeConditionType = "Ready" + // NodeOutOfDisk means the kubelet will not accept new pods due to insufficient free disk + // space on the node. + NodeOutOfDisk NodeConditionType = "OutOfDisk" ) type NodeCondition struct { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index 20f6d11ebc6..0eccf7750e8 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -1868,6 +1868,9 @@ type NodeConditionType string const ( // NodeReady means kubelet is healthy and ready to accept pods. NodeReady NodeConditionType = "Ready" + // NodeOutOfDisk means the kubelet will not accept new pods due to insufficient free disk + // space on the node. + NodeOutOfDisk NodeConditionType = "OutOfDisk" ) // NodeCondition contains condition infromation for a node. diff --git a/pkg/client/cache/listers.go b/pkg/client/cache/listers.go index f310459b8db..36ee48d89b0 100644 --- a/pkg/client/cache/listers.go +++ b/pkg/client/cache/listers.go @@ -136,7 +136,9 @@ func (s storeToNodeConditionLister) List() (nodes api.NodeList, err error) { // Get the last condition of the required type for _, cond := range node.Status.Conditions { if cond.Type == s.conditionType { - nodeCondition = &cond + condCopy := cond + nodeCondition = &condCopy + break } else { glog.V(4).Infof("Ignoring condition type %v for node %v", cond.Type, node.Name) } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index b30aacbfbe3..2783dfa8ff5 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2389,6 +2389,8 @@ func (kl *Kubelet) syncNetworkStatus() { // setNodeStatus fills in the Status fields of the given Node, overwriting // any fields that are currently set. +// TODO(madhusudancs): Simplify the logic for setting node conditions and +// refactor the node status condtion code out to a different file. func (kl *Kubelet) setNodeStatus(node *api.Node) error { // Set addresses for the node. if kl.cloud != nil { @@ -2549,6 +2551,61 @@ func (kl *Kubelet) setNodeStatus(node *api.Node) error { kl.recordNodeStatusEvent("NodeNotReady") } } + + var nodeOODCondition *api.NodeCondition + + // Check if NodeOutOfDisk condition already exists and if it does, just pick it up for update. + for i := range node.Status.Conditions { + if node.Status.Conditions[i].Type == api.NodeOutOfDisk { + nodeOODCondition = &node.Status.Conditions[i] + } + } + + newOODCondition := false + // If the NodeOutOfDisk condition doesn't exist, create one. + if nodeOODCondition == nil { + nodeOODCondition = &api.NodeCondition{ + Type: api.NodeOutOfDisk, + Status: api.ConditionUnknown, + LastTransitionTime: currentTime, + } + // nodeOODCondition cannot be appended to node.Status.Conditions here because it gets + // copied to the slice. So if we append nodeOODCondition to the slice here none of the + // updates we make to nodeOODCondition below are reflected in the slice. + newOODCondition = true + } + + // Update the heartbeat time irrespective of all the conditions. + nodeOODCondition.LastHeartbeatTime = currentTime + + // Note: The conditions below take care of the case when a new NodeOutOfDisk condition is + // created and as well as the case when the condition already exists. When a new condition + // is created its status is set to api.ConditionUnknown which matches either + // nodeOODCondition.Status != api.ConditionTrue or + // nodeOODCondition.Status != api.ConditionFalse in the conditions below depending on whether + // the kubelet is out of disk or not. + if kl.isOutOfDisk() { + if nodeOODCondition.Status != api.ConditionTrue { + nodeOODCondition.Status = api.ConditionTrue + nodeOODCondition.Reason = "KubeletOutOfDisk" + nodeOODCondition.Message = "out of disk space" + nodeOODCondition.LastTransitionTime = currentTime + kl.recordNodeStatusEvent("NodeOutOfDisk") + } + } else { + if nodeOODCondition.Status != api.ConditionFalse { + nodeOODCondition.Status = api.ConditionFalse + nodeOODCondition.Reason = "KubeletHasSufficientDisk" + nodeOODCondition.Message = "kubelet has sufficient disk space available" + nodeOODCondition.LastTransitionTime = currentTime + kl.recordNodeStatusEvent("NodeHasSufficientDisk") + } + } + + if newOODCondition { + node.Status.Conditions = append(node.Status.Conditions, *nodeOODCondition) + } + if oldNodeUnschedulable != node.Spec.Unschedulable { if node.Spec.Unschedulable { kl.recordNodeStatusEvent("NodeNotSchedulable") diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 7e2a45a5fe2..d0e6727e160 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -2504,6 +2504,23 @@ func TestUpdateNewNodeStatus(t *testing.T) { DockerVersion: "1.5.0", } mockCadvisor.On("VersionInfo").Return(versionInfo, nil) + + // Create a new DiskSpaceManager with a new policy. This new manager along with the mock + // FsInfo values added to Cadvisor should make the kubelet report that it has sufficient + // disk space. + dockerimagesFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb} + rootFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb} + mockCadvisor.On("DockerImagesFsInfo").Return(dockerimagesFsInfo, nil) + mockCadvisor.On("RootFsInfo").Return(rootFsInfo, nil) + + dsp := DiskSpacePolicy{DockerFreeDiskMB: 100, RootFreeDiskMB: 100} + diskSpaceManager, err := newDiskSpaceManager(mockCadvisor, dsp) + if err != nil { + t.Fatalf("can't update disk space manager: %v", err) + } + diskSpaceManager.Unfreeze() + kubelet.diskSpaceManager = diskSpaceManager + expectedNode := &api.Node{ ObjectMeta: api.ObjectMeta{Name: testKubeletHostname}, Spec: api.NodeSpec{}, @@ -2517,6 +2534,14 @@ func TestUpdateNewNodeStatus(t *testing.T) { LastHeartbeatTime: unversioned.Time{}, LastTransitionTime: unversioned.Time{}, }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionFalse, + Reason: "KubeletHasSufficientDisk", + Message: fmt.Sprintf("kubelet has sufficient disk space available"), + LastHeartbeatTime: unversioned.Time{}, + LastTransitionTime: unversioned.Time{}, + }, }, NodeInfo: api.NodeSystemInfo{ MachineID: "123", @@ -2555,14 +2580,17 @@ func TestUpdateNewNodeStatus(t *testing.T) { if !ok { t.Errorf("unexpected object type") } - if updatedNode.Status.Conditions[0].LastHeartbeatTime.IsZero() { - t.Errorf("unexpected zero last probe timestamp") + for i, cond := range updatedNode.Status.Conditions { + if cond.LastHeartbeatTime.IsZero() { + t.Errorf("unexpected zero last probe timestamp for %v condition", cond.Type) + } + if cond.LastTransitionTime.IsZero() { + t.Errorf("unexpected zero last transition timestamp for %v condition", cond.Type) + } + updatedNode.Status.Conditions[i].LastHeartbeatTime = unversioned.Time{} + updatedNode.Status.Conditions[i].LastTransitionTime = unversioned.Time{} } - if updatedNode.Status.Conditions[0].LastTransitionTime.IsZero() { - t.Errorf("unexpected zero last transition timestamp") - } - updatedNode.Status.Conditions[0].LastHeartbeatTime = unversioned.Time{} - updatedNode.Status.Conditions[0].LastTransitionTime = unversioned.Time{} + if !reflect.DeepEqual(expectedNode, updatedNode) { t.Errorf("unexpected objects: %s", util.ObjectDiff(expectedNode, updatedNode)) } @@ -2586,6 +2614,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) { LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionTrue, + Reason: "KubeletOutOfDisk", + Message: "out of disk space", + LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, }, Capacity: api.ResourceList{ api.ResourceCPU: *resource.NewMilliQuantity(3000, resource.DecimalSI), @@ -2610,6 +2646,22 @@ func TestUpdateExistingNodeStatus(t *testing.T) { DockerVersion: "1.5.0", } mockCadvisor.On("VersionInfo").Return(versionInfo, nil) + + // Create a new DiskSpaceManager with a new policy. This new manager along with the mock FsInfo + // values added to Cadvisor should make the kubelet report that it is out of disk space. + dockerimagesFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 70 * mb} + rootFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 50 * mb} + mockCadvisor.On("DockerImagesFsInfo").Return(dockerimagesFsInfo, nil) + mockCadvisor.On("RootFsInfo").Return(rootFsInfo, nil) + + dsp := DiskSpacePolicy{DockerFreeDiskMB: 100, RootFreeDiskMB: 100} + diskSpaceManager, err := newDiskSpaceManager(mockCadvisor, dsp) + if err != nil { + t.Fatalf("can't update disk space manager: %v", err) + } + diskSpaceManager.Unfreeze() + kubelet.diskSpaceManager = diskSpaceManager + expectedNode := &api.Node{ ObjectMeta: api.ObjectMeta{Name: testKubeletHostname}, Spec: api.NodeSpec{}, @@ -2623,6 +2675,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) { LastHeartbeatTime: unversioned.Time{}, // placeholder LastTransitionTime: unversioned.Time{}, // placeholder }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionTrue, + Reason: "KubeletOutOfDisk", + Message: "out of disk space", + LastHeartbeatTime: unversioned.Time{}, // placeholder + LastTransitionTime: unversioned.Time{}, // placeholder + }, }, NodeInfo: api.NodeSystemInfo{ MachineID: "123", @@ -2662,16 +2722,18 @@ func TestUpdateExistingNodeStatus(t *testing.T) { if !ok { t.Errorf("unexpected object type") } - // Expect LastProbeTime to be updated to Now, while LastTransitionTime to be the same. - if reflect.DeepEqual(updatedNode.Status.Conditions[0].LastHeartbeatTime.Rfc3339Copy().UTC(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) { - t.Errorf("expected \n%v\n, got \n%v", unversioned.Now(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC)) + for i, cond := range updatedNode.Status.Conditions { + // Expect LastProbeTime to be updated to Now, while LastTransitionTime to be the same. + if old := unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time; reflect.DeepEqual(cond.LastHeartbeatTime.Rfc3339Copy().UTC(), old) { + t.Errorf("Condition %v LastProbeTime: expected \n%v\n, got \n%v", cond.Type, unversioned.Now(), old) + } + if got, want := cond.LastTransitionTime.Rfc3339Copy().UTC(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time; !reflect.DeepEqual(got, want) { + t.Errorf("Condition %v LastTransitionTime: expected \n%#v\n, got \n%#v", cond.Type, want, got) + } + updatedNode.Status.Conditions[i].LastHeartbeatTime = unversioned.Time{} + updatedNode.Status.Conditions[i].LastTransitionTime = unversioned.Time{} } - if !reflect.DeepEqual(updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy().UTC(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) { - t.Errorf("expected \n%#v\n, got \n%#v", updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy(), - unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC)) - } - updatedNode.Status.Conditions[0].LastHeartbeatTime = unversioned.Time{} - updatedNode.Status.Conditions[0].LastTransitionTime = unversioned.Time{} + if !reflect.DeepEqual(expectedNode, updatedNode) { t.Errorf("expected \n%v\n, got \n%v", expectedNode, updatedNode) } @@ -2705,6 +2767,22 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) { } mockCadvisor.On("VersionInfo").Return(versionInfo, nil) + // Create a new DiskSpaceManager with a new policy. This new manager along with the + // mock FsInfo values assigned to Cadvisor should make the kubelet report that it has + // sufficient disk space. + dockerimagesFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb} + rootFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb} + mockCadvisor.On("DockerImagesFsInfo").Return(dockerimagesFsInfo, nil) + mockCadvisor.On("RootFsInfo").Return(rootFsInfo, nil) + + dsp := DiskSpacePolicy{DockerFreeDiskMB: 100, RootFreeDiskMB: 100} + diskSpaceManager, err := newDiskSpaceManager(mockCadvisor, dsp) + if err != nil { + t.Fatalf("can't update disk space manager: %v", err) + } + diskSpaceManager.Unfreeze() + kubelet.diskSpaceManager = diskSpaceManager + expectedNode := &api.Node{ ObjectMeta: api.ObjectMeta{Name: testKubeletHostname}, Spec: api.NodeSpec{}, @@ -2718,6 +2796,14 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) { LastHeartbeatTime: unversioned.Time{}, LastTransitionTime: unversioned.Time{}, }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionFalse, + Reason: "KubeletHasSufficientDisk", + Message: "kubelet has sufficient disk space available", + LastHeartbeatTime: unversioned.Time{}, + LastTransitionTime: unversioned.Time{}, + }, }, NodeInfo: api.NodeSystemInfo{ MachineID: "123", @@ -2758,14 +2844,17 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) { t.Errorf("unexpected action type. expected UpdateAction, got %#v", actions[1]) } - if updatedNode.Status.Conditions[0].LastHeartbeatTime.IsZero() { - t.Errorf("unexpected zero last probe timestamp") + for i, cond := range updatedNode.Status.Conditions { + if cond.LastHeartbeatTime.IsZero() { + t.Errorf("unexpected zero last probe timestamp") + } + if cond.LastTransitionTime.IsZero() { + t.Errorf("unexpected zero last transition timestamp") + } + updatedNode.Status.Conditions[i].LastHeartbeatTime = unversioned.Time{} + updatedNode.Status.Conditions[i].LastTransitionTime = unversioned.Time{} } - if updatedNode.Status.Conditions[0].LastTransitionTime.IsZero() { - t.Errorf("unexpected zero last transition timestamp") - } - updatedNode.Status.Conditions[0].LastHeartbeatTime = unversioned.Time{} - updatedNode.Status.Conditions[0].LastTransitionTime = unversioned.Time{} + if !reflect.DeepEqual(expectedNode, updatedNode) { t.Errorf("unexpected objects: %s", util.ObjectDiff(expectedNode, updatedNode)) }