Merge pull request #16178 from madhusudancs/kubelet-out-of-disk-no-master

Report node out of disk condition in the kubelet.
This commit is contained in:
Daniel Smith 2015-10-27 14:28:03 -07:00
commit 8524d392bf
5 changed files with 178 additions and 24 deletions

View File

@ -1491,6 +1491,9 @@ type NodeConditionType string
const (
// NodeReady means kubelet is healthy and ready to accept pods.
NodeReady NodeConditionType = "Ready"
// NodeOutOfDisk means the kubelet will not accept new pods due to insufficient free disk
// space on the node.
NodeOutOfDisk NodeConditionType = "OutOfDisk"
)
type NodeCondition struct {

View File

@ -1868,6 +1868,9 @@ type NodeConditionType string
const (
// NodeReady means kubelet is healthy and ready to accept pods.
NodeReady NodeConditionType = "Ready"
// NodeOutOfDisk means the kubelet will not accept new pods due to insufficient free disk
// space on the node.
NodeOutOfDisk NodeConditionType = "OutOfDisk"
)
// NodeCondition contains condition infromation for a node.

View File

@ -136,7 +136,9 @@ func (s storeToNodeConditionLister) List() (nodes api.NodeList, err error) {
// Get the last condition of the required type
for _, cond := range node.Status.Conditions {
if cond.Type == s.conditionType {
nodeCondition = &cond
condCopy := cond
nodeCondition = &condCopy
break
} else {
glog.V(4).Infof("Ignoring condition type %v for node %v", cond.Type, node.Name)
}

View File

@ -2389,6 +2389,8 @@ func (kl *Kubelet) syncNetworkStatus() {
// setNodeStatus fills in the Status fields of the given Node, overwriting
// any fields that are currently set.
// TODO(madhusudancs): Simplify the logic for setting node conditions and
// refactor the node status condtion code out to a different file.
func (kl *Kubelet) setNodeStatus(node *api.Node) error {
// Set addresses for the node.
if kl.cloud != nil {
@ -2549,6 +2551,61 @@ func (kl *Kubelet) setNodeStatus(node *api.Node) error {
kl.recordNodeStatusEvent("NodeNotReady")
}
}
var nodeOODCondition *api.NodeCondition
// Check if NodeOutOfDisk condition already exists and if it does, just pick it up for update.
for i := range node.Status.Conditions {
if node.Status.Conditions[i].Type == api.NodeOutOfDisk {
nodeOODCondition = &node.Status.Conditions[i]
}
}
newOODCondition := false
// If the NodeOutOfDisk condition doesn't exist, create one.
if nodeOODCondition == nil {
nodeOODCondition = &api.NodeCondition{
Type: api.NodeOutOfDisk,
Status: api.ConditionUnknown,
LastTransitionTime: currentTime,
}
// nodeOODCondition cannot be appended to node.Status.Conditions here because it gets
// copied to the slice. So if we append nodeOODCondition to the slice here none of the
// updates we make to nodeOODCondition below are reflected in the slice.
newOODCondition = true
}
// Update the heartbeat time irrespective of all the conditions.
nodeOODCondition.LastHeartbeatTime = currentTime
// Note: The conditions below take care of the case when a new NodeOutOfDisk condition is
// created and as well as the case when the condition already exists. When a new condition
// is created its status is set to api.ConditionUnknown which matches either
// nodeOODCondition.Status != api.ConditionTrue or
// nodeOODCondition.Status != api.ConditionFalse in the conditions below depending on whether
// the kubelet is out of disk or not.
if kl.isOutOfDisk() {
if nodeOODCondition.Status != api.ConditionTrue {
nodeOODCondition.Status = api.ConditionTrue
nodeOODCondition.Reason = "KubeletOutOfDisk"
nodeOODCondition.Message = "out of disk space"
nodeOODCondition.LastTransitionTime = currentTime
kl.recordNodeStatusEvent("NodeOutOfDisk")
}
} else {
if nodeOODCondition.Status != api.ConditionFalse {
nodeOODCondition.Status = api.ConditionFalse
nodeOODCondition.Reason = "KubeletHasSufficientDisk"
nodeOODCondition.Message = "kubelet has sufficient disk space available"
nodeOODCondition.LastTransitionTime = currentTime
kl.recordNodeStatusEvent("NodeHasSufficientDisk")
}
}
if newOODCondition {
node.Status.Conditions = append(node.Status.Conditions, *nodeOODCondition)
}
if oldNodeUnschedulable != node.Spec.Unschedulable {
if node.Spec.Unschedulable {
kl.recordNodeStatusEvent("NodeNotSchedulable")

View File

@ -2507,6 +2507,23 @@ func TestUpdateNewNodeStatus(t *testing.T) {
DockerVersion: "1.5.0",
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
// Create a new DiskSpaceManager with a new policy. This new manager along with the mock
// FsInfo values added to Cadvisor should make the kubelet report that it has sufficient
// disk space.
dockerimagesFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb}
rootFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb}
mockCadvisor.On("DockerImagesFsInfo").Return(dockerimagesFsInfo, nil)
mockCadvisor.On("RootFsInfo").Return(rootFsInfo, nil)
dsp := DiskSpacePolicy{DockerFreeDiskMB: 100, RootFreeDiskMB: 100}
diskSpaceManager, err := newDiskSpaceManager(mockCadvisor, dsp)
if err != nil {
t.Fatalf("can't update disk space manager: %v", err)
}
diskSpaceManager.Unfreeze()
kubelet.diskSpaceManager = diskSpaceManager
expectedNode := &api.Node{
ObjectMeta: api.ObjectMeta{Name: testKubeletHostname},
Spec: api.NodeSpec{},
@ -2520,6 +2537,14 @@ func TestUpdateNewNodeStatus(t *testing.T) {
LastHeartbeatTime: unversioned.Time{},
LastTransitionTime: unversioned.Time{},
},
{
Type: api.NodeOutOfDisk,
Status: api.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
LastHeartbeatTime: unversioned.Time{},
LastTransitionTime: unversioned.Time{},
},
},
NodeInfo: api.NodeSystemInfo{
MachineID: "123",
@ -2558,14 +2583,17 @@ func TestUpdateNewNodeStatus(t *testing.T) {
if !ok {
t.Errorf("unexpected object type")
}
if updatedNode.Status.Conditions[0].LastHeartbeatTime.IsZero() {
t.Errorf("unexpected zero last probe timestamp")
for i, cond := range updatedNode.Status.Conditions {
if cond.LastHeartbeatTime.IsZero() {
t.Errorf("unexpected zero last probe timestamp for %v condition", cond.Type)
}
if cond.LastTransitionTime.IsZero() {
t.Errorf("unexpected zero last transition timestamp for %v condition", cond.Type)
}
updatedNode.Status.Conditions[i].LastHeartbeatTime = unversioned.Time{}
updatedNode.Status.Conditions[i].LastTransitionTime = unversioned.Time{}
}
if updatedNode.Status.Conditions[0].LastTransitionTime.IsZero() {
t.Errorf("unexpected zero last transition timestamp")
}
updatedNode.Status.Conditions[0].LastHeartbeatTime = unversioned.Time{}
updatedNode.Status.Conditions[0].LastTransitionTime = unversioned.Time{}
if !reflect.DeepEqual(expectedNode, updatedNode) {
t.Errorf("unexpected objects: %s", util.ObjectDiff(expectedNode, updatedNode))
}
@ -2589,6 +2617,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
{
Type: api.NodeOutOfDisk,
Status: api.ConditionTrue,
Reason: "KubeletOutOfDisk",
Message: "out of disk space",
LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
Capacity: api.ResourceList{
api.ResourceCPU: *resource.NewMilliQuantity(3000, resource.DecimalSI),
@ -2613,6 +2649,22 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
DockerVersion: "1.5.0",
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
// Create a new DiskSpaceManager with a new policy. This new manager along with the mock FsInfo
// values added to Cadvisor should make the kubelet report that it is out of disk space.
dockerimagesFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 70 * mb}
rootFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 50 * mb}
mockCadvisor.On("DockerImagesFsInfo").Return(dockerimagesFsInfo, nil)
mockCadvisor.On("RootFsInfo").Return(rootFsInfo, nil)
dsp := DiskSpacePolicy{DockerFreeDiskMB: 100, RootFreeDiskMB: 100}
diskSpaceManager, err := newDiskSpaceManager(mockCadvisor, dsp)
if err != nil {
t.Fatalf("can't update disk space manager: %v", err)
}
diskSpaceManager.Unfreeze()
kubelet.diskSpaceManager = diskSpaceManager
expectedNode := &api.Node{
ObjectMeta: api.ObjectMeta{Name: testKubeletHostname},
Spec: api.NodeSpec{},
@ -2626,6 +2678,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
LastHeartbeatTime: unversioned.Time{}, // placeholder
LastTransitionTime: unversioned.Time{}, // placeholder
},
{
Type: api.NodeOutOfDisk,
Status: api.ConditionTrue,
Reason: "KubeletOutOfDisk",
Message: "out of disk space",
LastHeartbeatTime: unversioned.Time{}, // placeholder
LastTransitionTime: unversioned.Time{}, // placeholder
},
},
NodeInfo: api.NodeSystemInfo{
MachineID: "123",
@ -2665,16 +2725,18 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
if !ok {
t.Errorf("unexpected object type")
}
// Expect LastProbeTime to be updated to Now, while LastTransitionTime to be the same.
if reflect.DeepEqual(updatedNode.Status.Conditions[0].LastHeartbeatTime.Rfc3339Copy().UTC(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) {
t.Errorf("expected \n%v\n, got \n%v", unversioned.Now(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC))
for i, cond := range updatedNode.Status.Conditions {
// Expect LastProbeTime to be updated to Now, while LastTransitionTime to be the same.
if old := unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time; reflect.DeepEqual(cond.LastHeartbeatTime.Rfc3339Copy().UTC(), old) {
t.Errorf("Condition %v LastProbeTime: expected \n%v\n, got \n%v", cond.Type, unversioned.Now(), old)
}
if got, want := cond.LastTransitionTime.Rfc3339Copy().UTC(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time; !reflect.DeepEqual(got, want) {
t.Errorf("Condition %v LastTransitionTime: expected \n%#v\n, got \n%#v", cond.Type, want, got)
}
updatedNode.Status.Conditions[i].LastHeartbeatTime = unversioned.Time{}
updatedNode.Status.Conditions[i].LastTransitionTime = unversioned.Time{}
}
if !reflect.DeepEqual(updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy().UTC(), unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC).Time) {
t.Errorf("expected \n%#v\n, got \n%#v", updatedNode.Status.Conditions[0].LastTransitionTime.Rfc3339Copy(),
unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC))
}
updatedNode.Status.Conditions[0].LastHeartbeatTime = unversioned.Time{}
updatedNode.Status.Conditions[0].LastTransitionTime = unversioned.Time{}
if !reflect.DeepEqual(expectedNode, updatedNode) {
t.Errorf("expected \n%v\n, got \n%v", expectedNode, updatedNode)
}
@ -2708,6 +2770,22 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) {
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
// Create a new DiskSpaceManager with a new policy. This new manager along with the
// mock FsInfo values assigned to Cadvisor should make the kubelet report that it has
// sufficient disk space.
dockerimagesFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb}
rootFsInfo := cadvisorapiv2.FsInfo{Capacity: 500 * mb, Available: 200 * mb}
mockCadvisor.On("DockerImagesFsInfo").Return(dockerimagesFsInfo, nil)
mockCadvisor.On("RootFsInfo").Return(rootFsInfo, nil)
dsp := DiskSpacePolicy{DockerFreeDiskMB: 100, RootFreeDiskMB: 100}
diskSpaceManager, err := newDiskSpaceManager(mockCadvisor, dsp)
if err != nil {
t.Fatalf("can't update disk space manager: %v", err)
}
diskSpaceManager.Unfreeze()
kubelet.diskSpaceManager = diskSpaceManager
expectedNode := &api.Node{
ObjectMeta: api.ObjectMeta{Name: testKubeletHostname},
Spec: api.NodeSpec{},
@ -2721,6 +2799,14 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) {
LastHeartbeatTime: unversioned.Time{},
LastTransitionTime: unversioned.Time{},
},
{
Type: api.NodeOutOfDisk,
Status: api.ConditionFalse,
Reason: "KubeletHasSufficientDisk",
Message: "kubelet has sufficient disk space available",
LastHeartbeatTime: unversioned.Time{},
LastTransitionTime: unversioned.Time{},
},
},
NodeInfo: api.NodeSystemInfo{
MachineID: "123",
@ -2761,14 +2847,17 @@ func TestUpdateNodeStatusWithoutContainerRuntime(t *testing.T) {
t.Errorf("unexpected action type. expected UpdateAction, got %#v", actions[1])
}
if updatedNode.Status.Conditions[0].LastHeartbeatTime.IsZero() {
t.Errorf("unexpected zero last probe timestamp")
for i, cond := range updatedNode.Status.Conditions {
if cond.LastHeartbeatTime.IsZero() {
t.Errorf("unexpected zero last probe timestamp")
}
if cond.LastTransitionTime.IsZero() {
t.Errorf("unexpected zero last transition timestamp")
}
updatedNode.Status.Conditions[i].LastHeartbeatTime = unversioned.Time{}
updatedNode.Status.Conditions[i].LastTransitionTime = unversioned.Time{}
}
if updatedNode.Status.Conditions[0].LastTransitionTime.IsZero() {
t.Errorf("unexpected zero last transition timestamp")
}
updatedNode.Status.Conditions[0].LastHeartbeatTime = unversioned.Time{}
updatedNode.Status.Conditions[0].LastTransitionTime = unversioned.Time{}
if !reflect.DeepEqual(expectedNode, updatedNode) {
t.Errorf("unexpected objects: %s", util.ObjectDiff(expectedNode, updatedNode))
}