From 01829432db07431689cfc0942a15e2dc58bf1fe6 Mon Sep 17 00:00:00 2001 From: Minhan Xia Date: Tue, 24 Nov 2015 14:46:17 -0800 Subject: [PATCH] update pod status once node becomes NotReady --- pkg/controller/node/nodecontroller.go | 40 ++++++ pkg/controller/node/nodecontroller_test.go | 154 ++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-) diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index 6e97c6c563a..bb38caa4e64 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -35,6 +35,7 @@ import ( "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/framework" "k8s.io/kubernetes/pkg/fields" + "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/runtime" "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util" @@ -489,6 +490,9 @@ func (nc *NodeController) monitorNodeStatus() error { // Report node event. if readyCondition.Status != api.ConditionTrue && lastReadyCondition.Status == api.ConditionTrue { nc.recordNodeStatusChange(node, "NodeNotReady") + if err = nc.markAllPodsNotReady(node.Name); err != nil { + util.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) + } } // Check with the cloud provider to see if the node still exists. If it @@ -832,6 +836,42 @@ func (nc *NodeController) deletePods(nodeName string) (bool, error) { return remaining, nil } +// update ready status of all pods running on given node from master +// return true if success +func (nc *NodeController) markAllPodsNotReady(nodeName string) error { + glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName) + opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(client.PodHost, nodeName)} + pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(opts) + if err != nil { + return err + } + + errMsg := []string{} + for _, pod := range pods.Items { + // Defensive check, also needed for tests. + if pod.Spec.NodeName != nodeName { + continue + } + + for i, cond := range pod.Status.Conditions { + if cond.Type == api.PodReady { + pod.Status.Conditions[i].Status = api.ConditionFalse + glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name) + pod, err := nc.kubeClient.Pods(pod.Namespace).UpdateStatus(&pod) + if err != nil { + glog.Warningf("Failed to updated status for pod %q: %v", format.Pod(pod), err) + errMsg = append(errMsg, fmt.Sprintf("%v", err)) + } + break + } + } + } + if len(errMsg) == 0 { + return nil + } + return fmt.Errorf("%v", strings.Join(errMsg, "; ")) +} + // terminatePods will ensure all pods on the given node that are in terminating state are eventually // cleaned up. Returns true if the node has no pods in terminating state, a duration that indicates how // long before we should check again (the next deadline for a pod to complete), or an error. diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index 614bbcaa042..1fdef327f80 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -595,6 +595,157 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { } } +func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) { + fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) + table := []struct { + fakeNodeHandler *FakeNodeHandler + timeToPass time.Duration + newNodeStatus api.NodeStatus + expectedPodStatusUpdate bool + }{ + // Node created recently, without status. + // Expect no action from node controller (within startup grace period). + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: fakeNow, + }, + }, + }, + Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), + }, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, with status updated recently. + // Expect no action from node controller (within monitor grace period). + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + // Node status has just been updated. + LastHeartbeatTime: fakeNow, + LastTransitionTime: fakeNow, + }, + }, + Capacity: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("10"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + Spec: api.NodeSpec{ + ExternalID: "node0", + }, + }, + }, + Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), + }, + expectedPodStatusUpdate: false, + }, + // Node created long time ago, with status updated by kubelet exceeds grace period. + // Expect pods status updated and Unknown node status posted from node controller + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionFalse, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + Capacity: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("10"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + Spec: api.NodeSpec{ + ExternalID: "node0", + }, + }, + }, + Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), + }, + timeToPass: 1 * time.Minute, + newNodeStatus: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + { + Type: api.NodeOutOfDisk, + Status: api.ConditionFalse, + // Node status hasn't been updated for 1hr. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + Capacity: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("10"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + expectedPodStatusUpdate: true, + }, + } + + for i, item := range table { + nodeController := NewNodeController(nil, item.fakeNodeHandler, 5*time.Minute, util.NewFakeRateLimiter(), + util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false) + nodeController.now = func() unversioned.Time { return fakeNow } + if err := nodeController.monitorNodeStatus(); err != nil { + t.Errorf("Case[%d] unexpected error: %v", i, err) + } + if item.timeToPass > 0 { + nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} } + item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus + if err := nodeController.monitorNodeStatus(); err != nil { + t.Errorf("Case[%d] unexpected error: %v", i, err) + } + } + + podStatusUpdated := false + for _, action := range item.fakeNodeHandler.Actions() { + if action.GetVerb() == "update" && action.GetResource() == "pods" && action.GetSubresource() == "status" { + podStatusUpdated = true + } + } + if podStatusUpdated != item.expectedPodStatusUpdate { + t.Errorf("Case[%d] expect pod status updated to be %v, but got %v", i, item.expectedPodStatusUpdate, podStatusUpdated) + } + } +} + func TestNodeDeletion(t *testing.T) { fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNodeHandler := &FakeNodeHandler{ @@ -831,7 +982,8 @@ func newNode(name string) *api.Node { } func newPod(name, host string) *api.Pod { - return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host}} + return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host}, + Status: api.PodStatus{Conditions: []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}}} } func contains(node *api.Node, nodes []*api.Node) bool {