Merge pull request #17741 from freehan/nodedown

Auto commit by PR queue bot
This commit is contained in:
k8s-merge-robot 2016-01-15 00:02:48 -08:00
commit 0834890c42
2 changed files with 193 additions and 1 deletions

View File

@ -35,6 +35,7 @@ import (
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/framework"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util"
@ -489,6 +490,9 @@ func (nc *NodeController) monitorNodeStatus() error {
// Report node event.
if readyCondition.Status != api.ConditionTrue && lastReadyCondition.Status == api.ConditionTrue {
nc.recordNodeStatusChange(node, "NodeNotReady")
if err = nc.markAllPodsNotReady(node.Name); err != nil {
util.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
}
}
// Check with the cloud provider to see if the node still exists. If it
@ -832,6 +836,42 @@ func (nc *NodeController) deletePods(nodeName string) (bool, error) {
return remaining, nil
}
// update ready status of all pods running on given node from master
// return true if success
func (nc *NodeController) markAllPodsNotReady(nodeName string) error {
glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName)
opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(client.PodHost, nodeName)}
pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(opts)
if err != nil {
return err
}
errMsg := []string{}
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeName {
continue
}
for i, cond := range pod.Status.Conditions {
if cond.Type == api.PodReady {
pod.Status.Conditions[i].Status = api.ConditionFalse
glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
pod, err := nc.kubeClient.Pods(pod.Namespace).UpdateStatus(&pod)
if err != nil {
glog.Warningf("Failed to updated status for pod %q: %v", format.Pod(pod), err)
errMsg = append(errMsg, fmt.Sprintf("%v", err))
}
break
}
}
}
if len(errMsg) == 0 {
return nil
}
return fmt.Errorf("%v", strings.Join(errMsg, "; "))
}
// terminatePods will ensure all pods on the given node that are in terminating state are eventually
// cleaned up. Returns true if the node has no pods in terminating state, a duration that indicates how
// long before we should check again (the next deadline for a pod to complete), or an error.

View File

@ -595,6 +595,157 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
}
}
func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct {
fakeNodeHandler *FakeNodeHandler
timeToPass time.Duration
newNodeStatus api.NodeStatus
expectedPodStatusUpdate bool
}{
// Node created recently, without status.
// Expect no action from node controller (within startup grace period).
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
expectedPodStatusUpdate: false,
},
// Node created long time ago, with status updated recently.
// Expect no action from node controller (within monitor grace period).
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status has just been updated.
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node0",
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
expectedPodStatusUpdate: false,
},
// Node created long time ago, with status updated by kubelet exceeds grace period.
// Expect pods status updated and Unknown node status posted from node controller
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: api.NodeOutOfDisk,
Status: api.ConditionFalse,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node0",
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
timeToPass: 1 * time.Minute,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: api.NodeOutOfDisk,
Status: api.ConditionFalse,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
expectedPodStatusUpdate: true,
},
}
for i, item := range table {
nodeController := NewNodeController(nil, item.fakeNodeHandler, 5*time.Minute, util.NewFakeRateLimiter(),
util.NewFakeRateLimiter(), testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() unversioned.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("Case[%d] unexpected error: %v", i, err)
}
if item.timeToPass > 0 {
nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("Case[%d] unexpected error: %v", i, err)
}
}
podStatusUpdated := false
for _, action := range item.fakeNodeHandler.Actions() {
if action.GetVerb() == "update" && action.GetResource() == "pods" && action.GetSubresource() == "status" {
podStatusUpdated = true
}
}
if podStatusUpdated != item.expectedPodStatusUpdate {
t.Errorf("Case[%d] expect pod status updated to be %v, but got %v", i, item.expectedPodStatusUpdate, podStatusUpdated)
}
}
}
func TestNodeDeletion(t *testing.T) {
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
fakeNodeHandler := &FakeNodeHandler{
@ -831,7 +982,8 @@ func newNode(name string) *api.Node {
}
func newPod(name, host string) *api.Pod {
return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host}}
return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host},
Status: api.PodStatus{Conditions: []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}}}
}
func contains(node *api.Node, nodes []*api.Node) bool {