diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index af87c36883e..9f65749a42c 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -44,6 +44,7 @@ import ( "k8s.io/kubernetes/pkg/util/metrics" utilruntime "k8s.io/kubernetes/pkg/util/runtime" "k8s.io/kubernetes/pkg/util/sets" + "k8s.io/kubernetes/pkg/util/system" "k8s.io/kubernetes/pkg/util/wait" "k8s.io/kubernetes/pkg/version" "k8s.io/kubernetes/pkg/watch" @@ -123,6 +124,11 @@ type NodeController struct { forcefullyDeletePod func(*api.Pod) error nodeExistsInCloudProvider func(string) (bool, error) + + // If in network segmentation mode NodeController won't evict Pods from unhealthy Nodes. + // It is enabled when all Nodes observed by the NodeController are NotReady and disabled + // when NC sees any healthy Node. This is a temporary fix for v1.3. + networkSegmentationMode bool } // NewNodeController returns a new node controller to sync instances from cloudprovider. @@ -141,10 +147,10 @@ func NewNodeController( recorder := eventBroadcaster.NewRecorder(api.EventSource{Component: "controllermanager"}) eventBroadcaster.StartLogging(glog.Infof) if kubeClient != nil { - glog.Infof("Sending events to api server.") + glog.V(0).Infof("Sending events to api server.") eventBroadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{Interface: kubeClient.Core().Events("")}) } else { - glog.Infof("No api server defined - no events will be sent to API server.") + glog.V(0).Infof("No api server defined - no events will be sent to API server.") } if kubeClient != nil && kubeClient.Core().GetRESTClient().GetRateLimiter() != nil { @@ -282,7 +288,7 @@ func (nc *NodeController) Run(period time.Duration) { } if completed { - glog.Infof("All pods terminated on %s", value.Value) + glog.V(2).Infof("All pods terminated on %s", value.Value) nc.recordNodeEvent(value.Value, api.EventTypeNormal, "TerminatedAllPods", fmt.Sprintf("Terminated all Pods on Node %s.", value.Value)) return true, 0 } @@ -371,7 +377,7 @@ func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) { node := nodeObj.(*api.Node) v, err := version.Parse(node.Status.NodeInfo.KubeletVersion) if err != nil { - glog.Infof("couldn't parse verions %q of minion: %v", node.Status.NodeInfo.KubeletVersion, err) + glog.V(0).Infof("couldn't parse verions %q of minion: %v", node.Status.NodeInfo.KubeletVersion, err) utilruntime.HandleError(nc.forcefullyDeletePod(pod)) return } @@ -407,7 +413,7 @@ func forcefullyDeletePod(c clientset.Interface, pod *api.Pod) error { var zero int64 err := c.Core().Pods(pod.Namespace).Delete(pod.Name, &api.DeleteOptions{GracePeriodSeconds: &zero}) if err == nil { - glog.Infof("forceful deletion of %s succeeded", pod.Name) + glog.V(4).Infof("forceful deletion of %s succeeded", pod.Name) } return err } @@ -449,13 +455,14 @@ func (nc *NodeController) monitorNodeStatus() error { // reduce lists/decouple this from monitoring status. nc.reconcileNodeCIDRs(nodes) } + seenReady := false for i := range nodes.Items { var gracePeriod time.Duration - var lastReadyCondition api.NodeCondition - var readyCondition *api.NodeCondition + var observedReadyCondition api.NodeCondition + var currentReadyCondition *api.NodeCondition node := &nodes.Items[i] for rep := 0; rep < nodeStatusUpdateRetry; rep++ { - gracePeriod, lastReadyCondition, readyCondition, err = nc.tryUpdateNodeStatus(node) + gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) if err == nil { break } @@ -474,28 +481,32 @@ func (nc *NodeController) monitorNodeStatus() error { decisionTimestamp := nc.now() - if readyCondition != nil { + if currentReadyCondition != nil { // Check eviction timeout against decisionTimestamp - if lastReadyCondition.Status == api.ConditionFalse && + if observedReadyCondition.Status == api.ConditionFalse && decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node.Name) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout) } } - if lastReadyCondition.Status == api.ConditionUnknown && + if observedReadyCondition.Status == api.ConditionUnknown && decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node.Name) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod) } } - if lastReadyCondition.Status == api.ConditionTrue { + if observedReadyCondition.Status == api.ConditionTrue { + // We do not treat a master node as a part of the cluster for network segmentation checking. + if !system.IsMasterNode(node) { + seenReady = true + } if nc.cancelPodEviction(node.Name) { glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) } } // Report node event. - if readyCondition.Status != api.ConditionTrue && lastReadyCondition.Status == api.ConditionTrue { + if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue { nc.recordNodeStatusChange(node, "NodeNotReady") if err = nc.markAllPodsNotReady(node.Name); err != nil { utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) @@ -504,14 +515,14 @@ func (nc *NodeController) monitorNodeStatus() error { // Check with the cloud provider to see if the node still exists. If it // doesn't, delete the node immediately. - if readyCondition.Status != api.ConditionTrue && nc.cloud != nil { + if currentReadyCondition.Status != api.ConditionTrue && nc.cloud != nil { exists, err := nc.nodeExistsInCloudProvider(node.Name) if err != nil { glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err) continue } if !exists { - glog.Infof("Deleting node (no longer present in cloud provider): %s", node.Name) + glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name) nc.recordNodeEvent(node.Name, api.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name)) go func(nodeName string) { defer utilruntime.HandleCrash() @@ -527,6 +538,18 @@ func (nc *NodeController) monitorNodeStatus() error { } } } + + // NC don't see any Ready Node. We assume that the network is segmented and Nodes cannot connect to API server and + // update their statuses. NC enteres network segmentation mode and cancels all evictions in progress. + if !seenReady { + nc.networkSegmentationMode = true + nc.stopAllPodEvictions() + } else { + if nc.networkSegmentationMode { + nc.forceUpdateAllProbeTimes() + nc.networkSegmentationMode = false + } + } return nil } @@ -632,13 +655,13 @@ func (nc *NodeController) recordNodeStatusChange(node *api.Node, new_status stri func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) { var err error var gracePeriod time.Duration - var lastReadyCondition api.NodeCondition - readyCondition := nc.getCondition(&node.Status, api.NodeReady) - if readyCondition == nil { + var observedReadyCondition api.NodeCondition + currentReadyCondition := nc.getCondition(&node.Status, api.NodeReady) + if currentReadyCondition == nil { // If ready condition is nil, then kubelet (or nodecontroller) never posted node status. // A fake ready condition is created, where LastProbeTime and LastTransitionTime is set // to node.CreationTimestamp to avoid handle the corner case. - lastReadyCondition = api.NodeCondition{ + observedReadyCondition = api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionUnknown, LastHeartbeatTime: node.CreationTimestamp, @@ -652,7 +675,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap } } else { // If ready condition is not nil, make a copy of it, since we may modify it in place later. - lastReadyCondition = *readyCondition + observedReadyCondition = *currentReadyCondition gracePeriod = nc.nodeMonitorGracePeriod } @@ -683,7 +706,6 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } - nc.nodeStatusMap[node.Name] = savedNodeStatus } else if savedCondition == nil && observedCondition != nil { glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name) savedNodeStatus = nodeStatusData{ @@ -691,7 +713,6 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } - nc.nodeStatusMap[node.Name] = savedNodeStatus } else if savedCondition != nil && observedCondition == nil { glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name) // TODO: figure out what to do in this case. For now we do the same thing as above. @@ -700,7 +721,6 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap probeTimestamp: nc.now(), readyTransitionTimestamp: nc.now(), } - nc.nodeStatusMap[node.Name] = savedNodeStatus } else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime { var transitionTime unversioned.Time // If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now", @@ -713,7 +733,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap transitionTime = savedNodeStatus.readyTransitionTimestamp } if glog.V(5) { - glog.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status) + glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status) } else { glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name) } @@ -722,13 +742,13 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap probeTimestamp: nc.now(), readyTransitionTimestamp: transitionTime, } - nc.nodeStatusMap[node.Name] = savedNodeStatus } + nc.nodeStatusMap[node.Name] = savedNodeStatus if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) { // NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown // (regardless of its current value) in the master. - if readyCondition == nil { + if currentReadyCondition == nil { glog.V(2).Infof("node %v is never updated by kubelet", node.Name) node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ Type: api.NodeReady, @@ -740,14 +760,14 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap }) } else { glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v", - node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), lastReadyCondition) - if lastReadyCondition.Status != api.ConditionUnknown { - readyCondition.Status = api.ConditionUnknown - readyCondition.Reason = "NodeStatusUnknown" - readyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") + node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition) + if observedReadyCondition.Status != api.ConditionUnknown { + currentReadyCondition.Status = api.ConditionUnknown + currentReadyCondition.Reason = "NodeStatusUnknown" + currentReadyCondition.Message = fmt.Sprintf("Kubelet stopped posting node status.") // LastProbeTime is the last time we heard from kubelet. - readyCondition.LastHeartbeatTime = lastReadyCondition.LastHeartbeatTime - readyCondition.LastTransitionTime = nc.now() + currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime + currentReadyCondition.LastTransitionTime = nc.now() } } @@ -776,27 +796,41 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap } } - if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &lastReadyCondition) { + if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &observedReadyCondition) { if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil { glog.Errorf("Error updating node %s: %v", node.Name, err) - return gracePeriod, lastReadyCondition, readyCondition, err + return gracePeriod, observedReadyCondition, currentReadyCondition, err } else { nc.nodeStatusMap[node.Name] = nodeStatusData{ status: node.Status, probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp, readyTransitionTimestamp: nc.now(), } - return gracePeriod, lastReadyCondition, readyCondition, nil + return gracePeriod, observedReadyCondition, currentReadyCondition, nil } } } - return gracePeriod, lastReadyCondition, readyCondition, err + return gracePeriod, observedReadyCondition, currentReadyCondition, err +} + +// forceUpdateAllProbeTimes bumps all observed timestamps in saved nodeStatuses to now. This makes +// all eviction timer to reset. +func (nc *NodeController) forceUpdateAllProbeTimes() { + now := nc.now() + for k, v := range nc.nodeStatusMap { + v.probeTimestamp = now + v.readyTransitionTimestamp = now + nc.nodeStatusMap[k] = v + } } // evictPods queues an eviction for the provided node name, and returns false if the node is already // queued for eviction. func (nc *NodeController) evictPods(nodeName string) bool { + if nc.networkSegmentationMode { + return false + } nc.evictorLock.Lock() defer nc.evictorLock.Unlock() return nc.podEvictor.Add(nodeName) @@ -816,6 +850,15 @@ func (nc *NodeController) cancelPodEviction(nodeName string) bool { return false } +// stopAllPodEvictions removes any queued evictions for all Nodes. +func (nc *NodeController) stopAllPodEvictions() { + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + glog.V(3).Infof("Cancelling all pod evictions.") + nc.podEvictor.Clear() + nc.terminationEvictor.Clear() +} + // deletePods will delete all pods from master running on given node, and return true // if any pods were deleted. func (nc *NodeController) deletePods(nodeName string) (bool, error) { diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index 4f43ac6aff1..07d0d436370 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -165,13 +165,29 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) evictionTimeout := 10 * time.Minute + // Because of the logic that prevents NC from evicting anything when all Nodes are NotReady + // we need second healthy node in tests. Because of how the tests are written we need to update + // the status of this Node. + healthyNodeNewStatus := api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + // Node status has just been updated, and is NotReady for 10min. + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + } + table := []struct { - fakeNodeHandler *FakeNodeHandler - daemonSets []extensions.DaemonSet - timeToPass time.Duration - newNodeStatus api.NodeStatus - expectedEvictPods bool - description string + fakeNodeHandler *FakeNodeHandler + daemonSets []extensions.DaemonSet + timeToPass time.Duration + newNodeStatus api.NodeStatus + secondNodeNewStatus api.NodeStatus + expectedEvictPods bool + description string }{ // Node created recently, with no status (happens only at cluster startup). { @@ -183,14 +199,31 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { CreationTimestamp: fakeNow, }, }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, - daemonSets: nil, - timeToPass: 0, - newNodeStatus: api.NodeStatus{}, - expectedEvictPods: false, - description: "Node created recently, with no status.", + daemonSets: nil, + timeToPass: 0, + newNodeStatus: api.NodeStatus{}, + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: false, + description: "Node created recently, with no status.", }, // Node created long time ago, and kubelet posted NotReady for a short period of time. { @@ -212,6 +245,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, @@ -228,8 +277,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, - expectedEvictPods: false, - description: "Node created long time ago, and kubelet posted NotReady for a short period of time.", + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: false, + description: "Node created long time ago, and kubelet posted NotReady for a short period of time.", }, // Pod is ds-managed, and kubelet posted NotReady for a long period of time. { @@ -251,6 +301,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset( &api.PodList{ @@ -294,8 +360,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, - expectedEvictPods: false, - description: "Pod is ds-managed, and kubelet posted NotReady for a long period of time.", + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: false, + description: "Pod is ds-managed, and kubelet posted NotReady for a long period of time.", }, // Node created long time ago, and kubelet posted NotReady for a long period of time. { @@ -317,6 +384,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, @@ -333,8 +416,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, - expectedEvictPods: true, - description: "Node created long time ago, and kubelet posted NotReady for a long period of time.", + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: true, + description: "Node created long time ago, and kubelet posted NotReady for a long period of time.", }, // Node created long time ago, node controller posted Unknown for a short period of time. { @@ -356,6 +440,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, @@ -372,8 +472,9 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, - expectedEvictPods: false, - description: "Node created long time ago, node controller posted Unknown for a short period of time.", + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: false, + description: "Node created long time ago, node controller posted Unknown for a short period of time.", }, // Node created long time ago, node controller posted Unknown for a long period of time. { @@ -395,6 +496,78 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), + }, + daemonSets: nil, + timeToPass: 60 * time.Minute, + newNodeStatus: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + // Node status was updated by nodecontroller 1hr ago + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: true, + description: "Node created long time ago, node controller posted Unknown for a long period of time.", + }, + // NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, }, Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), }, @@ -411,8 +584,76 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { }, }, }, - expectedEvictPods: true, - description: "Node created long time ago, node controller posted Unknown for a long period of time.", + secondNodeNewStatus: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + // Node status was updated by nodecontroller 1hr ago + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + expectedEvictPods: false, + description: "Network Segmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.", + }, + // NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period + // of on first Node, eviction should stop even though -master Node is healthy. + { + fakeNodeHandler: &FakeNodeHandler{ + Existing: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node-master", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), + }, + daemonSets: nil, + timeToPass: 60 * time.Minute, + newNodeStatus: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + // Node status was updated by nodecontroller 1hr ago + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: false, + description: "NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of on first Node, eviction should stop even though -master Node is healthy", }, } @@ -430,6 +671,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { if item.timeToPass > 0 { nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} } item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus + item.fakeNodeHandler.Existing[1].Status = item.secondNodeNewStatus } if err := nodeController.monitorNodeStatus(); err != nil { t.Errorf("unexpected error: %v", err) diff --git a/pkg/controller/node/rate_limited_queue.go b/pkg/controller/node/rate_limited_queue.go index a3f00756556..a2865418fb7 100644 --- a/pkg/controller/node/rate_limited_queue.go +++ b/pkg/controller/node/rate_limited_queue.go @@ -133,6 +133,18 @@ func (q *UniqueQueue) Head() (TimedValue, bool) { return *result, true } +// Clear removes all items from the queue and duplication preventing set. +func (q *UniqueQueue) Clear() { + q.lock.Lock() + defer q.lock.Unlock() + if q.queue.Len() > 0 { + q.queue = make(TimedQueue, 0) + } + if len(q.set) > 0 { + q.set = sets.NewString() + } +} + // RateLimitedTimedQueue is a unique item priority queue ordered by the expected next time // of execution. It is also rate limited. type RateLimitedTimedQueue struct { @@ -199,3 +211,8 @@ func (q *RateLimitedTimedQueue) Add(value string) bool { func (q *RateLimitedTimedQueue) Remove(value string) bool { return q.queue.Remove(value) } + +// Removes all items from the queue +func (q *RateLimitedTimedQueue) Clear() { + q.queue.Clear() +} diff --git a/pkg/controller/node/rate_limited_queue_test.go b/pkg/controller/node/rate_limited_queue_test.go index e6ed0bd12bf..854b75e9b18 100644 --- a/pkg/controller/node/rate_limited_queue_test.go +++ b/pkg/controller/node/rate_limited_queue_test.go @@ -261,3 +261,16 @@ func TestTryRemovingWhileTry(t *testing.T) { t.Fatalf("unexpected iterations: %d", count) } } + +func TestClear(t *testing.T) { + evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter()) + evictor.Add("first") + evictor.Add("second") + evictor.Add("third") + + evictor.Clear() + + if len(evictor.queue.queue) != 0 { + t.Fatalf("Clear should remove all elements from the queue.") + } +}