diff --git a/pkg/controller/node/controller_utils.go b/pkg/controller/node/controller_utils.go index 215b811cf05..19abd03715c 100644 --- a/pkg/controller/node/controller_utils.go +++ b/pkg/controller/node/controller_utils.go @@ -35,18 +35,34 @@ import ( "github.com/golang/glog" ) +const ( + // Number of Nodes that needs to be in the cluster for it to be treated as "large" + LargeClusterThreshold = 20 +) + // This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone. +// The zone is considered: +// - fullyDisrupted if there're no Ready Nodes, +// - partiallyDisrupted if more than 1/3 of Nodes (at least 3) are not Ready, +// - normal otherwise func ComputeZoneState(nodeReadyConditions []*api.NodeCondition) zoneState { - seenReady := false + readyNodes := 0 + notReadyNodes := 0 for i := range nodeReadyConditions { if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == api.ConditionTrue { - seenReady = true + readyNodes++ + } else { + notReadyNodes++ } } - if seenReady { + switch { + case readyNodes == 0 && notReadyNodes > 0: + return stateFullDisruption + case notReadyNodes > 2 && 2*notReadyNodes > readyNodes: + return statePartialDisruption + default: return stateNormal } - return stateFullSegmentation } // cleanupOrphanedPods deletes pods that are bound to nodes that don't @@ -320,3 +336,15 @@ func terminatePods(kubeClient clientset.Interface, recorder record.EventRecorder } return complete, nextAttempt, nil } + +func HealthyQPSFunc(nodeNum int, defaultQPS float32) float32 { + return defaultQPS +} + +// If the cluster is large make evictions slower, if they're small stop evictions altogether. +func ReducedQPSFunc(nodeNum int, defaultQPS float32) float32 { + if nodeNum > LargeClusterThreshold { + return defaultQPS / 10 + } + return 0 +} diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index 153f5a52998..5c20a1c6dcc 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -67,9 +67,10 @@ const ( type zoneState string const ( - stateNormal = zoneState("Normal") - stateFullSegmentation = zoneState("FullSegmentation") - statePartialSegmentation = zoneState("PartialSegmentation") + stateInitial = zoneState("Initial") + stateNormal = zoneState("Normal") + stateFullDisruption = zoneState("FullDisruption") + statePartialDisruption = zoneState("PartialDisruption") ) type nodeStatusData struct { @@ -136,9 +137,11 @@ type NodeController struct { // allocate/recycle CIDRs for node if allocateNodeCIDRs == true cidrAllocator CIDRAllocator - forcefullyDeletePod func(*api.Pod) error - nodeExistsInCloudProvider func(string) (bool, error) - computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState + forcefullyDeletePod func(*api.Pod) error + nodeExistsInCloudProvider func(string) (bool, error) + computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState + enterPartialDisruptionFunc func(nodeNum int, defaultQPS float32) float32 + enterFullDisruptionFunc func(nodeNum int, defaultQPS float32) float32 zoneStates map[string]zoneState @@ -192,28 +195,30 @@ func NewNodeController( } nc := &NodeController{ - cloud: cloud, - knownNodeSet: make(map[string]*api.Node), - kubeClient: kubeClient, - recorder: recorder, - podEvictionTimeout: podEvictionTimeout, - maximumGracePeriod: 5 * time.Minute, - zonePodEvictor: make(map[string]*RateLimitedTimedQueue), - zoneTerminationEvictor: make(map[string]*RateLimitedTimedQueue), - nodeStatusMap: make(map[string]nodeStatusData), - nodeMonitorGracePeriod: nodeMonitorGracePeriod, - nodeMonitorPeriod: nodeMonitorPeriod, - nodeStartupGracePeriod: nodeStartupGracePeriod, - lookupIP: net.LookupIP, - now: unversioned.Now, - clusterCIDR: clusterCIDR, - serviceCIDR: serviceCIDR, - allocateNodeCIDRs: allocateNodeCIDRs, - forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) }, - nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) }, - computeZoneStateFunc: ComputeZoneState, - evictionLimiterQPS: evictionLimiterQPS, - zoneStates: make(map[string]zoneState), + cloud: cloud, + knownNodeSet: make(map[string]*api.Node), + kubeClient: kubeClient, + recorder: recorder, + podEvictionTimeout: podEvictionTimeout, + maximumGracePeriod: 5 * time.Minute, + zonePodEvictor: make(map[string]*RateLimitedTimedQueue), + zoneTerminationEvictor: make(map[string]*RateLimitedTimedQueue), + nodeStatusMap: make(map[string]nodeStatusData), + nodeMonitorGracePeriod: nodeMonitorGracePeriod, + nodeMonitorPeriod: nodeMonitorPeriod, + nodeStartupGracePeriod: nodeStartupGracePeriod, + lookupIP: net.LookupIP, + now: unversioned.Now, + clusterCIDR: clusterCIDR, + serviceCIDR: serviceCIDR, + allocateNodeCIDRs: allocateNodeCIDRs, + forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) }, + nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) }, + enterPartialDisruptionFunc: ReducedQPSFunc, + enterFullDisruptionFunc: HealthyQPSFunc, + computeZoneStateFunc: ComputeZoneState, + evictionLimiterQPS: evictionLimiterQPS, + zoneStates: make(map[string]zoneState), } podInformer.AddEventHandler(framework.ResourceEventHandlerFuncs{ @@ -491,7 +496,7 @@ func (nc *NodeController) monitorNodeStatus() error { "Skipping - no pods will be evicted.", node.Name) continue } - // We do not treat a master node as a part of the cluster for network segmentation checking. + // We do not treat a master node as a part of the cluster for network disruption checking. if !system.IsMasterNode(node) { zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition) } @@ -550,39 +555,110 @@ func (nc *NodeController) monitorNodeStatus() error { } } } - - for k, v := range zoneToNodeConditions { - newState := nc.computeZoneStateFunc(v) - if newState == nc.zoneStates[k] { - continue - } - if newState == stateFullSegmentation { - glog.V(2).Infof("NodeController is entering network segmentation mode in zone %v.", k) - } else if newState == stateNormal { - glog.V(2).Infof("NodeController exited network segmentation mode in zone %v.", k) - } - for i := range nodes.Items { - if utilnode.GetZoneKey(&nodes.Items[i]) == k { - if newState == stateFullSegmentation { - // When zone is fully segmented we stop the eviction all together. - nc.cancelPodEviction(&nodes.Items[i]) - } - if newState == stateNormal && nc.zoneStates[k] == stateFullSegmentation { - // When exiting segmentation mode update probe timestamps on all Nodes. - now := nc.now() - v := nc.nodeStatusMap[nodes.Items[i].Name] - v.probeTimestamp = now - v.readyTransitionTimestamp = now - nc.nodeStatusMap[nodes.Items[i].Name] = v - } - } - } - nc.zoneStates[k] = newState - } + nc.handleDisruption(zoneToNodeConditions, nodes) return nil } +func (nc *NodeController) handleDisruption(zoneToNodeConditions map[string][]*api.NodeCondition, nodes *api.NodeList) { + newZoneStates := map[string]zoneState{} + allAreFullyDisrupted := true + for k, v := range zoneToNodeConditions { + newState := nc.computeZoneStateFunc(v) + if newState != stateFullDisruption { + allAreFullyDisrupted = false + } + newZoneStates[k] = newState + if _, had := nc.zoneStates[k]; !had { + nc.zoneStates[k] = stateInitial + } + } + + allWasFullyDisrupted := true + for k, v := range nc.zoneStates { + if _, have := zoneToNodeConditions[k]; !have { + delete(nc.zoneStates, k) + continue + } + if v != stateFullDisruption { + allWasFullyDisrupted = false + break + } + } + + // At least one node was responding in previous pass or in the current pass. Semantics is as follows: + // - if the new state is "partialDisruption" we call a user defined function that returns a new limiter to use, + // - if the new state is "normal" we resume normal operation (go back to default limiter settings), + // - if new state is "fullDisruption" we restore normal eviction rate, + // - unless all zones in the cluster are in "fullDisruption" - in that case we stop all evictions. + if !allAreFullyDisrupted || !allWasFullyDisrupted { + // We're switching to full disruption mode + if allAreFullyDisrupted { + glog.V(0).Info("NodeController detected that all Nodes are not-Ready. Entering master disruption mode.") + for i := range nodes.Items { + nc.cancelPodEviction(&nodes.Items[i]) + } + // We stop all evictions. + for k := range nc.zonePodEvictor { + nc.zonePodEvictor[k].SwapLimiter(0) + nc.zoneTerminationEvictor[k].SwapLimiter(0) + } + for k := range nc.zoneStates { + nc.zoneStates[k] = stateFullDisruption + } + // All rate limiters are updated, so we can return early here. + return + } + // We're exiting full disruption mode + if allWasFullyDisrupted { + glog.V(0).Info("NodeController detected that some Nodes are Ready. Exiting master disruption mode.") + // When exiting disruption mode update probe timestamps on all Nodes. + now := nc.now() + for i := range nodes.Items { + v := nc.nodeStatusMap[nodes.Items[i].Name] + v.probeTimestamp = now + v.readyTransitionTimestamp = now + nc.nodeStatusMap[nodes.Items[i].Name] = v + } + // We reset all rate limiters to settings appropriate for the given state. + for k := range nc.zonePodEvictor { + nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newZoneStates[k]) + nc.zoneStates[k] = newZoneStates[k] + } + return + } + // We know that there's at least one not-fully disrupted so, + // we can use default behavior for rate limiters + for k, v := range nc.zoneStates { + newState := newZoneStates[k] + if v == newState { + continue + } + glog.V(0).Infof("NodeController detected that zone %v is now in state %v.", k, newState) + nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState) + nc.zoneStates[k] = newState + } + } +} + +func (nc *NodeController) setLimiterInZone(zone string, zoneSize int, state zoneState) { + switch state { + case stateNormal: + nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS) + nc.zoneTerminationEvictor[zone].SwapLimiter(nc.evictionLimiterQPS) + case statePartialDisruption: + nc.zonePodEvictor[zone].SwapLimiter( + nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS)) + nc.zoneTerminationEvictor[zone].SwapLimiter( + nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS)) + case stateFullDisruption: + nc.zonePodEvictor[zone].SwapLimiter( + nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS)) + nc.zoneTerminationEvictor[zone].SwapLimiter( + nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS)) + } +} + // For a given node checks its conditions and tries to update it. Returns grace period to which given node // is entitled, state of current and last observed Ready Condition, and an error if it occurred. func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) { @@ -791,16 +867,5 @@ func (nc *NodeController) cancelPodEviction(node *api.Node) bool { func (nc *NodeController) evictPods(node *api.Node) bool { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() - foundHealty := false - for _, state := range nc.zoneStates { - if state != stateFullSegmentation { - foundHealty = true - break - } - } - if !foundHealty { - return false - } - zone := utilnode.GetZoneKey(node) - return nc.zonePodEvictor[zone].Add(node.Name) + return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name) } diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index ededa280253..66a0b941c24 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -35,7 +35,6 @@ const ( testNodeMonitorGracePeriod = 40 * time.Second testNodeStartupGracePeriod = 60 * time.Second testNodeMonitorPeriod = 5 * time.Second - testRateLimiterBurst = 10000 testRateLimiterQPS = float32(10000) ) @@ -458,145 +457,6 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { expectedEvictPods: true, description: "Node created long time ago, node controller posted Unknown for a long period of time.", }, - // NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. - { - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{ - Name: "node0", - CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - Labels: map[string]string{ - unversioned.LabelZoneRegion: "region1", - unversioned.LabelZoneFailureDomain: "zone1", - }, - }, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - }, - { - ObjectMeta: api.ObjectMeta{ - Name: "node1", - CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - Labels: map[string]string{ - unversioned.LabelZoneRegion: "region2", - unversioned.LabelZoneFailureDomain: "zone2", - }, - }, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), - }, - daemonSets: nil, - timeToPass: 60 * time.Minute, - newNodeStatus: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - // Node status was updated by nodecontroller 1hr ago - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - secondNodeNewStatus: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - // Node status was updated by nodecontroller 1hr ago - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - expectedEvictPods: false, - description: "Network Segmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.", - }, - // NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period - // of on first Node, eviction should stop even though -master Node is healthy. - { - fakeNodeHandler: &FakeNodeHandler{ - Existing: []*api.Node{ - { - ObjectMeta: api.ObjectMeta{ - Name: "node0", - CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - Labels: map[string]string{ - unversioned.LabelZoneRegion: "region1", - unversioned.LabelZoneFailureDomain: "zone1", - }, - }, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - }, - { - ObjectMeta: api.ObjectMeta{ - Name: "node-master", - CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - Labels: map[string]string{ - unversioned.LabelZoneRegion: "region1", - unversioned.LabelZoneFailureDomain: "zone1", - }, - }, - Status: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionTrue, - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - }, - }, - Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}), - }, - daemonSets: nil, - timeToPass: 60 * time.Minute, - newNodeStatus: api.NodeStatus{ - Conditions: []api.NodeCondition{ - { - Type: api.NodeReady, - Status: api.ConditionUnknown, - // Node status was updated by nodecontroller 1hr ago - LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), - }, - }, - }, - secondNodeNewStatus: healthyNodeNewStatus, - expectedEvictPods: false, - description: "NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of on first Node, eviction should stop even though -master Node is healthy", - }, } for _, item := range table { @@ -647,6 +507,539 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { } } +func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) { + fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) + evictionTimeout := 10 * time.Minute + timeToPass := 60 * time.Minute + + // Because of the logic that prevents NC from evicting anything when all Nodes are NotReady + // we need second healthy node in tests. Because of how the tests are written we need to update + // the status of this Node. + healthyNodeNewStatus := api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 13, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + } + unhealthyNodeNewStatus := api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + // Node status was updated by nodecontroller 1hr ago + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + } + + table := []struct { + nodeList []*api.Node + podList []api.Pod + updatedNodeStatuses []api.NodeStatus + expectedInitialStates map[string]zoneState + expectedFollowingStates map[string]zoneState + expectedEvictPods bool + description string + }{ + // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. + // Only zone is down - eviction shouldn't take place + { + nodeList: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + podList: []api.Pod{*newPod("pod0", "node0")}, + updatedNodeStatuses: []api.NodeStatus{ + unhealthyNodeNewStatus, + unhealthyNodeNewStatus, + }, + expectedInitialStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption}, + expectedFollowingStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption}, + expectedEvictPods: false, + description: "Network Disruption: Only zone is down - eviction shouldn't take place.", + }, + // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. + // Both zones down - eviction shouldn't take place + { + nodeList: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region2", + unversioned.LabelZoneFailureDomain: "zone2", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + + podList: []api.Pod{*newPod("pod0", "node0")}, + updatedNodeStatuses: []api.NodeStatus{ + unhealthyNodeNewStatus, + unhealthyNodeNewStatus, + }, + expectedInitialStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + createZoneID("region2", "zone2"): stateFullDisruption, + }, + expectedFollowingStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + createZoneID("region2", "zone2"): stateFullDisruption, + }, + expectedEvictPods: false, + description: "Network Disruption: Both zones down - eviction shouldn't take place.", + }, + // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. + // One zone is down - eviction should take place + { + nodeList: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone2", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + podList: []api.Pod{*newPod("pod0", "node0")}, + updatedNodeStatuses: []api.NodeStatus{ + unhealthyNodeNewStatus, + healthyNodeNewStatus, + }, + expectedInitialStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + createZoneID("region1", "zone2"): stateNormal, + }, + expectedFollowingStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + createZoneID("region1", "zone2"): stateNormal, + }, + expectedEvictPods: true, + description: "Network Disruption: One zone is down - eviction should take place.", + }, + // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period + // of on first Node, eviction should stop even though -master Node is healthy. + { + nodeList: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node-master", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + podList: []api.Pod{*newPod("pod0", "node0")}, + updatedNodeStatuses: []api.NodeStatus{ + unhealthyNodeNewStatus, + healthyNodeNewStatus, + }, + expectedInitialStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + }, + expectedFollowingStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + }, + expectedEvictPods: false, + description: "NetworkDisruption: eviction should stop, only -master Node is healthy", + }, + // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. + // Initially both zones down, one comes back - eviction should take place + { + nodeList: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone2", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + + podList: []api.Pod{*newPod("pod0", "node0")}, + updatedNodeStatuses: []api.NodeStatus{ + unhealthyNodeNewStatus, + healthyNodeNewStatus, + }, + expectedInitialStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + createZoneID("region1", "zone2"): stateFullDisruption, + }, + expectedFollowingStates: map[string]zoneState{ + createZoneID("region1", "zone1"): stateFullDisruption, + createZoneID("region1", "zone2"): stateNormal, + }, + expectedEvictPods: true, + description: "Initially both zones down, one comes back - eviction should take place", + }, + // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes. + // Zone is partially disrupted - eviction should take place + { + nodeList: []*api.Node{ + { + ObjectMeta: api.ObjectMeta{ + Name: "node0", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node1", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node2", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionUnknown, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node3", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + { + ObjectMeta: api.ObjectMeta{ + Name: "node4", + CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + Labels: map[string]string{ + unversioned.LabelZoneRegion: "region1", + unversioned.LabelZoneFailureDomain: "zone1", + }, + }, + Status: api.NodeStatus{ + Conditions: []api.NodeCondition{ + { + Type: api.NodeReady, + Status: api.ConditionTrue, + LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + + podList: []api.Pod{*newPod("pod0", "node0")}, + updatedNodeStatuses: []api.NodeStatus{ + unhealthyNodeNewStatus, + unhealthyNodeNewStatus, + unhealthyNodeNewStatus, + healthyNodeNewStatus, + healthyNodeNewStatus, + }, + expectedInitialStates: map[string]zoneState{ + createZoneID("region1", "zone1"): statePartialDisruption, + }, + expectedFollowingStates: map[string]zoneState{ + createZoneID("region1", "zone1"): statePartialDisruption, + }, + expectedEvictPods: true, + description: "Zone is partially disrupted - eviction should take place.", + }, + } + + for _, item := range table { + fakeNodeHandler := &FakeNodeHandler{ + Existing: item.nodeList, + Clientset: fake.NewSimpleClientset(&api.PodList{Items: item.podList}), + } + nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, + evictionTimeout, testRateLimiterQPS, testNodeMonitorGracePeriod, + testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false) + nodeController.now = func() unversioned.Time { return fakeNow } + nodeController.enterPartialDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 { + return testRateLimiterQPS + } + nodeController.enterFullDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 { + return testRateLimiterQPS + } + if err := nodeController.monitorNodeStatus(); err != nil { + t.Errorf("%v: unexpected error: %v", item.description, err) + } + + for zone, state := range item.expectedInitialStates { + if state != nodeController.zoneStates[zone] { + t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state) + } + } + + nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(timeToPass)} } + for i := range item.updatedNodeStatuses { + fakeNodeHandler.Existing[i].Status = item.updatedNodeStatuses[i] + } + + if err := nodeController.monitorNodeStatus(); err != nil { + t.Errorf("%v: unexpected error: %v", item.description, err) + } + // Give some time for rate-limiter to reload + time.Sleep(50 * time.Millisecond) + + for zone, state := range item.expectedFollowingStates { + if state != nodeController.zoneStates[zone] { + t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state) + } + } + zones := getZones(fakeNodeHandler) + for _, zone := range zones { + nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) { + remaining, _ := deletePods(fakeNodeHandler, nodeController.recorder, value.Value, nodeController.daemonSetStore) + if remaining { + nodeController.zoneTerminationEvictor[zone].Add(value.Value) + } + return true, 0 + }) + nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) { + terminatePods(fakeNodeHandler, nodeController.recorder, value.Value, value.AddedAt, nodeController.maximumGracePeriod) + return true, 0 + }) + } + + podEvicted := false + for _, action := range fakeNodeHandler.Actions() { + if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" { + podEvicted = true + break + } + } + + if item.expectedEvictPods != podEvicted { + t.Errorf("%v: expected pod eviction: %+v, got %+v", item.description, item.expectedEvictPods, podEvicted) + } + } +} + // TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes // pods and the node when kubelet has not reported, and the cloudprovider says // the node is gone. diff --git a/pkg/controller/node/rate_limited_queue.go b/pkg/controller/node/rate_limited_queue.go index 305ba129fe6..ea6ae8df5e1 100644 --- a/pkg/controller/node/rate_limited_queue.go +++ b/pkg/controller/node/rate_limited_queue.go @@ -21,9 +21,10 @@ import ( "sync" "time" - "github.com/golang/glog" "k8s.io/kubernetes/pkg/util/flowcontrol" "k8s.io/kubernetes/pkg/util/sets" + + "github.com/golang/glog" ) // TimedValue is a value that should be processed at a designated time. @@ -179,7 +180,7 @@ func (q *RateLimitedTimedQueue) Try(fn ActionFunc) { for ok { // rate limit the queue checking if !q.limiter.TryAccept() { - glog.V(10).Info("Try rate limited...") + glog.V(10).Infof("Try rate limited for value: %v", val) // Try again later break } diff --git a/pkg/controller/node/test_utils.go b/pkg/controller/node/test_utils.go index cd55e19163a..4ccd4e3f827 100644 --- a/pkg/controller/node/test_utils.go +++ b/pkg/controller/node/test_utils.go @@ -247,3 +247,7 @@ func getZones(nodeHandler *FakeNodeHandler) []string { } return zones.List() } + +func createZoneID(region, zone string) string { + return region + ":\x00:" + zone +}