NodeController doesn't evict Pods if no Nodes are Ready

2025-09-14 13:45:06 +00:00 · 2016-05-16 11:20:23 +02:00
parent 1cba05574b
commit 6d27009db1
4 changed files with 373 additions and 58 deletions
--- a/pkg/controller/node/nodecontroller.go
+++ b/pkg/controller/node/nodecontroller.go
@@ -44,6 +44,7 @@ import (
 	"k8s.io/kubernetes/pkg/util/metrics"
 	utilruntime "k8s.io/kubernetes/pkg/util/runtime"
 	"k8s.io/kubernetes/pkg/util/sets"
+	"k8s.io/kubernetes/pkg/util/system"
 	"k8s.io/kubernetes/pkg/util/wait"
 	"k8s.io/kubernetes/pkg/version"
 	"k8s.io/kubernetes/pkg/watch"
@@ -123,6 +124,11 @@ type NodeController struct {

 	forcefullyDeletePod       func(*api.Pod) error
 	nodeExistsInCloudProvider func(string) (bool, error)
+
+	// If in network segmentation mode NodeController won't evict Pods from unhealthy Nodes.
+	// It is enabled when all Nodes observed by the NodeController are NotReady and disabled
+	// when NC sees any healthy Node. This is a temporary fix for v1.3.
+	networkSegmentationMode bool
 }

 // NewNodeController returns a new node controller to sync instances from cloudprovider.
@@ -141,10 +147,10 @@ func NewNodeController(
 	recorder := eventBroadcaster.NewRecorder(api.EventSource{Component: "controllermanager"})
 	eventBroadcaster.StartLogging(glog.Infof)
 	if kubeClient != nil {
-		glog.Infof("Sending events to api server.")
+		glog.V(0).Infof("Sending events to api server.")
 		eventBroadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{Interface: kubeClient.Core().Events("")})
 	} else {
-		glog.Infof("No api server defined - no events will be sent to API server.")
+		glog.V(0).Infof("No api server defined - no events will be sent to API server.")
 	}

 	if kubeClient != nil && kubeClient.Core().GetRESTClient().GetRateLimiter() != nil {
@@ -282,7 +288,7 @@ func (nc *NodeController) Run(period time.Duration) {
 			}

 			if completed {
-				glog.Infof("All pods terminated on %s", value.Value)
+				glog.V(2).Infof("All pods terminated on %s", value.Value)
 				nc.recordNodeEvent(value.Value, api.EventTypeNormal, "TerminatedAllPods", fmt.Sprintf("Terminated all Pods on Node %s.", value.Value))
 				return true, 0
 			}
@@ -371,7 +377,7 @@ func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) {
 	node := nodeObj.(*api.Node)
 	v, err := version.Parse(node.Status.NodeInfo.KubeletVersion)
 	if err != nil {
-		glog.Infof("couldn't parse verions %q of minion: %v", node.Status.NodeInfo.KubeletVersion, err)
+		glog.V(0).Infof("couldn't parse verions %q of minion: %v", node.Status.NodeInfo.KubeletVersion, err)
 		utilruntime.HandleError(nc.forcefullyDeletePod(pod))
 		return
 	}
@@ -407,7 +413,7 @@ func forcefullyDeletePod(c clientset.Interface, pod *api.Pod) error {
 	var zero int64
 	err := c.Core().Pods(pod.Namespace).Delete(pod.Name, &api.DeleteOptions{GracePeriodSeconds: &zero})
 	if err == nil {
-		glog.Infof("forceful deletion of %s succeeded", pod.Name)
+		glog.V(4).Infof("forceful deletion of %s succeeded", pod.Name)
 	}
 	return err
 }
@@ -449,13 +455,14 @@ func (nc *NodeController) monitorNodeStatus() error {
 		// reduce lists/decouple this from monitoring status.
 		nc.reconcileNodeCIDRs(nodes)
 	}
+	seenReady := false
 	for i := range nodes.Items {
 		var gracePeriod time.Duration
-		var lastReadyCondition api.NodeCondition
-		var readyCondition *api.NodeCondition
+		var observedReadyCondition api.NodeCondition
+		var currentReadyCondition *api.NodeCondition
 		node := &nodes.Items[i]
 		for rep := 0; rep < nodeStatusUpdateRetry; rep++ {
-			gracePeriod, lastReadyCondition, readyCondition, err = nc.tryUpdateNodeStatus(node)
+			gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node)
 			if err == nil {
 				break
 			}
@@ -474,28 +481,32 @@ func (nc *NodeController) monitorNodeStatus() error {

 		decisionTimestamp := nc.now()

-		if readyCondition != nil {
+		if currentReadyCondition != nil {
 			// Check eviction timeout against decisionTimestamp
-			if lastReadyCondition.Status == api.ConditionFalse &&
+			if observedReadyCondition.Status == api.ConditionFalse &&
 				decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
 				if nc.evictPods(node.Name) {
 					glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout)
 				}
 			}
-			if lastReadyCondition.Status == api.ConditionUnknown &&
+			if observedReadyCondition.Status == api.ConditionUnknown &&
 				decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
 				if nc.evictPods(node.Name) {
 					glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod)
 				}
 			}
-			if lastReadyCondition.Status == api.ConditionTrue {
+			if observedReadyCondition.Status == api.ConditionTrue {
+				// We do not treat a master node as a part of the cluster for network segmentation checking.
+				if !system.IsMasterNode(node) {
+					seenReady = true
+				}
 				if nc.cancelPodEviction(node.Name) {
 					glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
 				}
 			}

 			// Report node event.
-			if readyCondition.Status != api.ConditionTrue && lastReadyCondition.Status == api.ConditionTrue {
+			if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue {
 				nc.recordNodeStatusChange(node, "NodeNotReady")
 				if err = nc.markAllPodsNotReady(node.Name); err != nil {
 					utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err))
@@ -504,14 +515,14 @@ func (nc *NodeController) monitorNodeStatus() error {

 			// Check with the cloud provider to see if the node still exists. If it
 			// doesn't, delete the node immediately.
-			if readyCondition.Status != api.ConditionTrue && nc.cloud != nil {
+			if currentReadyCondition.Status != api.ConditionTrue && nc.cloud != nil {
 				exists, err := nc.nodeExistsInCloudProvider(node.Name)
 				if err != nil {
 					glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err)
 					continue
 				}
 				if !exists {
-					glog.Infof("Deleting node (no longer present in cloud provider): %s", node.Name)
+					glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name)
 					nc.recordNodeEvent(node.Name, api.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name))
 					go func(nodeName string) {
 						defer utilruntime.HandleCrash()
@@ -527,6 +538,18 @@ func (nc *NodeController) monitorNodeStatus() error {
 			}
 		}
 	}
+
+	// NC don't see any Ready Node. We assume that the network is segmented and Nodes cannot connect to API server and
+	// update their statuses. NC enteres network segmentation mode and cancels all evictions in progress.
+	if !seenReady {
+		nc.networkSegmentationMode = true
+		nc.stopAllPodEvictions()
+	} else {
+		if nc.networkSegmentationMode {
+			nc.forceUpdateAllProbeTimes()
+			nc.networkSegmentationMode = false
+		}
+	}
 	return nil
 }

@@ -632,13 +655,13 @@ func (nc *NodeController) recordNodeStatusChange(node *api.Node, new_status stri
 func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) {
 	var err error
 	var gracePeriod time.Duration
-	var lastReadyCondition api.NodeCondition
-	readyCondition := nc.getCondition(&node.Status, api.NodeReady)
-	if readyCondition == nil {
+	var observedReadyCondition api.NodeCondition
+	currentReadyCondition := nc.getCondition(&node.Status, api.NodeReady)
+	if currentReadyCondition == nil {
 		// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
 		// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
 		// to node.CreationTimestamp to avoid handle the corner case.
-		lastReadyCondition = api.NodeCondition{
+		observedReadyCondition = api.NodeCondition{
 			Type:               api.NodeReady,
 			Status:             api.ConditionUnknown,
 			LastHeartbeatTime:  node.CreationTimestamp,
@@ -652,7 +675,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 		}
 	} else {
 		// If ready condition is not nil, make a copy of it, since we may modify it in place later.
-		lastReadyCondition = *readyCondition
+		observedReadyCondition = *currentReadyCondition
 		gracePeriod = nc.nodeMonitorGracePeriod
 	}

@@ -683,7 +706,6 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			probeTimestamp:           nc.now(),
 			readyTransitionTimestamp: nc.now(),
 		}
-		nc.nodeStatusMap[node.Name] = savedNodeStatus
 	} else if savedCondition == nil && observedCondition != nil {
 		glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
 		savedNodeStatus = nodeStatusData{
@@ -691,7 +713,6 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			probeTimestamp:           nc.now(),
 			readyTransitionTimestamp: nc.now(),
 		}
-		nc.nodeStatusMap[node.Name] = savedNodeStatus
 	} else if savedCondition != nil && observedCondition == nil {
 		glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
 		// TODO: figure out what to do in this case. For now we do the same thing as above.
@@ -700,7 +721,6 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			probeTimestamp:           nc.now(),
 			readyTransitionTimestamp: nc.now(),
 		}
-		nc.nodeStatusMap[node.Name] = savedNodeStatus
 	} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
 		var transitionTime unversioned.Time
 		// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
@@ -713,7 +733,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			transitionTime = savedNodeStatus.readyTransitionTimestamp
 		}
 		if glog.V(5) {
-			glog.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status)
+			glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status)
 		} else {
 			glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
 		}
@@ -722,13 +742,13 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			probeTimestamp:           nc.now(),
 			readyTransitionTimestamp: transitionTime,
 		}
-		nc.nodeStatusMap[node.Name] = savedNodeStatus
 	}
+	nc.nodeStatusMap[node.Name] = savedNodeStatus

 	if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) {
 		// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
 		// (regardless of its current value) in the master.
-		if readyCondition == nil {
+		if currentReadyCondition == nil {
 			glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
 			node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
 				Type:               api.NodeReady,
@@ -740,14 +760,14 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			})
 		} else {
 			glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
-				node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), lastReadyCondition)
-			if lastReadyCondition.Status != api.ConditionUnknown {
-				readyCondition.Status = api.ConditionUnknown
-				readyCondition.Reason = "NodeStatusUnknown"
-				readyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
+				node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition)
+			if observedReadyCondition.Status != api.ConditionUnknown {
+				currentReadyCondition.Status = api.ConditionUnknown
+				currentReadyCondition.Reason = "NodeStatusUnknown"
+				currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.")
 				// LastProbeTime is the last time we heard from kubelet.
-				readyCondition.LastHeartbeatTime = lastReadyCondition.LastHeartbeatTime
-				readyCondition.LastTransitionTime = nc.now()
+				currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
+				currentReadyCondition.LastTransitionTime = nc.now()
 			}
 		}

@@ -776,27 +796,41 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
 			}
 		}

-		if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &lastReadyCondition) {
+		if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &observedReadyCondition) {
 			if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil {
 				glog.Errorf("Error updating node %s: %v", node.Name, err)
-				return gracePeriod, lastReadyCondition, readyCondition, err
+				return gracePeriod, observedReadyCondition, currentReadyCondition, err
 			} else {
 				nc.nodeStatusMap[node.Name] = nodeStatusData{
 					status:                   node.Status,
 					probeTimestamp:           nc.nodeStatusMap[node.Name].probeTimestamp,
 					readyTransitionTimestamp: nc.now(),
 				}
-				return gracePeriod, lastReadyCondition, readyCondition, nil
+				return gracePeriod, observedReadyCondition, currentReadyCondition, nil
 			}
 		}
 	}

-	return gracePeriod, lastReadyCondition, readyCondition, err
+	return gracePeriod, observedReadyCondition, currentReadyCondition, err
+}
+
+// forceUpdateAllProbeTimes bumps all observed timestamps in saved nodeStatuses to now. This makes
+// all eviction timer to reset.
+func (nc *NodeController) forceUpdateAllProbeTimes() {
+	now := nc.now()
+	for k, v := range nc.nodeStatusMap {
+		v.probeTimestamp = now
+		v.readyTransitionTimestamp = now
+		nc.nodeStatusMap[k] = v
+	}
 }

 // evictPods queues an eviction for the provided node name, and returns false if the node is already
 // queued for eviction.
 func (nc *NodeController) evictPods(nodeName string) bool {
+	if nc.networkSegmentationMode {
+		return false
+	}
 	nc.evictorLock.Lock()
 	defer nc.evictorLock.Unlock()
 	return nc.podEvictor.Add(nodeName)
@@ -816,6 +850,15 @@ func (nc *NodeController) cancelPodEviction(nodeName string) bool {
 	return false
 }

+// stopAllPodEvictions removes any queued evictions for all Nodes.
+func (nc *NodeController) stopAllPodEvictions() {
+	nc.evictorLock.Lock()
+	defer nc.evictorLock.Unlock()
+	glog.V(3).Infof("Cancelling all pod evictions.")
+	nc.podEvictor.Clear()
+	nc.terminationEvictor.Clear()
+}
+
 // deletePods will delete all pods from master running on given node, and return true
 // if any pods were deleted.
 func (nc *NodeController) deletePods(nodeName string) (bool, error) {
--- a/pkg/controller/node/nodecontroller_test.go
+++ b/pkg/controller/node/nodecontroller_test.go
@@ -165,13 +165,29 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 	fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
 	evictionTimeout := 10 * time.Minute

+	// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
+	// we need second healthy node in tests. Because of how the tests are written we need to update
+	// the status of this Node.
+	healthyNodeNewStatus := api.NodeStatus{
+		Conditions: []api.NodeCondition{
+			{
+				Type:   api.NodeReady,
+				Status: api.ConditionTrue,
+				// Node status has just been updated, and is NotReady for 10min.
+				LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
+				LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+			},
+		},
+	}
+
 	table := []struct {
-		fakeNodeHandler   *FakeNodeHandler
-		daemonSets        []extensions.DaemonSet
-		timeToPass        time.Duration
-		newNodeStatus     api.NodeStatus
-		expectedEvictPods bool
-		description       string
+		fakeNodeHandler     *FakeNodeHandler
+		daemonSets          []extensions.DaemonSet
+		timeToPass          time.Duration
+		newNodeStatus       api.NodeStatus
+		secondNodeNewStatus api.NodeStatus
+		expectedEvictPods   bool
+		description         string
 	}{
 		// Node created recently, with no status (happens only at cluster startup).
 		{
@@ -183,14 +199,31 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 							CreationTimestamp: fakeNow,
 						},
 					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
 				},
 				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
 			},
-			daemonSets:        nil,
-			timeToPass:        0,
-			newNodeStatus:     api.NodeStatus{},
-			expectedEvictPods: false,
-			description:       "Node created recently, with no status.",
+			daemonSets:          nil,
+			timeToPass:          0,
+			newNodeStatus:       api.NodeStatus{},
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   false,
+			description:         "Node created recently, with no status.",
 		},
 		// Node created long time ago, and kubelet posted NotReady for a short period of time.
 		{
@@ -212,6 +245,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 							},
 						},
 					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
 				},
 				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
 			},
@@ -228,8 +277,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 					},
 				},
 			},
-			expectedEvictPods: false,
-			description:       "Node created long time ago, and kubelet posted NotReady for a short period of time.",
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   false,
+			description:         "Node created long time ago, and kubelet posted NotReady for a short period of time.",
 		},
 		// Pod is ds-managed, and kubelet posted NotReady for a long period of time.
 		{
@@ -251,6 +301,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 							},
 						},
 					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
 				},
 				Clientset: fake.NewSimpleClientset(
 					&api.PodList{
@@ -294,8 +360,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 					},
 				},
 			},
-			expectedEvictPods: false,
-			description:       "Pod is ds-managed, and kubelet posted NotReady for a long period of time.",
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   false,
+			description:         "Pod is ds-managed, and kubelet posted NotReady for a long period of time.",
 		},
 		// Node created long time ago, and kubelet posted NotReady for a long period of time.
 		{
@@ -317,6 +384,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 							},
 						},
 					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
 				},
 				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
 			},
@@ -333,8 +416,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 					},
 				},
 			},
-			expectedEvictPods: true,
-			description:       "Node created long time ago, and kubelet posted NotReady for a long period of time.",
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   true,
+			description:         "Node created long time ago, and kubelet posted NotReady for a long period of time.",
 		},
 		// Node created long time ago, node controller posted Unknown for a short period of time.
 		{
@@ -356,6 +440,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 							},
 						},
 					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
 				},
 				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
 			},
@@ -372,8 +472,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 					},
 				},
 			},
-			expectedEvictPods: false,
-			description:       "Node created long time ago, node controller posted Unknown for a short period of time.",
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   false,
+			description:         "Node created long time ago, node controller posted Unknown for a short period of time.",
 		},
 		// Node created long time ago, node controller posted Unknown for a long period of time.
 		{
@@ -395,6 +496,78 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 							},
 						},
 					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
+				},
+				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
+			},
+			daemonSets: nil,
+			timeToPass: 60 * time.Minute,
+			newNodeStatus: api.NodeStatus{
+				Conditions: []api.NodeCondition{
+					{
+						Type:   api.NodeReady,
+						Status: api.ConditionUnknown,
+						// Node status was updated by nodecontroller 1hr ago
+						LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+					},
+				},
+			},
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   true,
+			description:         "Node created long time ago, node controller posted Unknown for a long period of time.",
+		},
+		// NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
+		{
+			fakeNodeHandler: &FakeNodeHandler{
+				Existing: []*api.Node{
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node0",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionUnknown,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node1",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionUnknown,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
 				},
 				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
 			},
@@ -411,8 +584,76 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 					},
 				},
 			},
-			expectedEvictPods: true,
-			description:       "Node created long time ago, node controller posted Unknown for a long period of time.",
+			secondNodeNewStatus: api.NodeStatus{
+				Conditions: []api.NodeCondition{
+					{
+						Type:   api.NodeReady,
+						Status: api.ConditionUnknown,
+						// Node status was updated by nodecontroller 1hr ago
+						LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+					},
+				},
+			},
+			expectedEvictPods: false,
+			description:       "Network Segmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.",
+		},
+		// NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period
+		// of on first Node, eviction should stop even though -master Node is healthy.
+		{
+			fakeNodeHandler: &FakeNodeHandler{
+				Existing: []*api.Node{
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node0",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionUnknown,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
+					{
+						ObjectMeta: api.ObjectMeta{
+							Name:              "node-master",
+							CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
+						},
+						Status: api.NodeStatus{
+							Conditions: []api.NodeCondition{
+								{
+									Type:               api.NodeReady,
+									Status:             api.ConditionTrue,
+									LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+									LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+								},
+							},
+						},
+					},
+				},
+				Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
+			},
+			daemonSets: nil,
+			timeToPass: 60 * time.Minute,
+			newNodeStatus: api.NodeStatus{
+				Conditions: []api.NodeCondition{
+					{
+						Type:   api.NodeReady,
+						Status: api.ConditionUnknown,
+						// Node status was updated by nodecontroller 1hr ago
+						LastHeartbeatTime:  unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+						LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
+					},
+				},
+			},
+			secondNodeNewStatus: healthyNodeNewStatus,
+			expectedEvictPods:   false,
+			description:         "NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of on first Node, eviction should stop even though -master Node is healthy",
 		},
 	}

@@ -430,6 +671,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 		if item.timeToPass > 0 {
 			nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
 			item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
+			item.fakeNodeHandler.Existing[1].Status = item.secondNodeNewStatus
 		}
 		if err := nodeController.monitorNodeStatus(); err != nil {
 			t.Errorf("unexpected error: %v", err)
--- a/pkg/controller/node/rate_limited_queue.go
+++ b/pkg/controller/node/rate_limited_queue.go
@@ -133,6 +133,18 @@ func (q *UniqueQueue) Head() (TimedValue, bool) {
 	return *result, true
 }

+// Clear removes all items from the queue and duplication preventing set.
+func (q *UniqueQueue) Clear() {
+	q.lock.Lock()
+	defer q.lock.Unlock()
+	if q.queue.Len() > 0 {
+		q.queue = make(TimedQueue, 0)
+	}
+	if len(q.set) > 0 {
+		q.set = sets.NewString()
+	}
+}
+
 // RateLimitedTimedQueue is a unique item priority queue ordered by the expected next time
 // of execution. It is also rate limited.
 type RateLimitedTimedQueue struct {
@@ -199,3 +211,8 @@ func (q *RateLimitedTimedQueue) Add(value string) bool {
 func (q *RateLimitedTimedQueue) Remove(value string) bool {
 	return q.queue.Remove(value)
 }
+
+// Removes all items from the queue
+func (q *RateLimitedTimedQueue) Clear() {
+	q.queue.Clear()
+}
--- a/pkg/controller/node/rate_limited_queue_test.go
+++ b/pkg/controller/node/rate_limited_queue_test.go
@@ -261,3 +261,16 @@ func TestTryRemovingWhileTry(t *testing.T) {
 		t.Fatalf("unexpected iterations: %d", count)
 	}
 }
+
+func TestClear(t *testing.T) {
+	evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter())
+	evictor.Add("first")
+	evictor.Add("second")
+	evictor.Add("third")
+
+	evictor.Clear()
+
+	if len(evictor.queue.queue) != 0 {
+		t.Fatalf("Clear should remove all elements from the queue.")
+	}
+}