Change eviction logic in NodeController and make it Zone-aware

This commit is contained in:
gmarek 2016-07-13 16:57:22 +02:00
parent d34428a6f4
commit 66224ce0bd
5 changed files with 707 additions and 216 deletions

View File

@ -35,18 +35,34 @@ import (
"github.com/golang/glog"
)
const (
// Number of Nodes that needs to be in the cluster for it to be treated as "large"
LargeClusterThreshold = 20
)
// This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone.
// The zone is considered:
// - fullyDisrupted if there're no Ready Nodes,
// - partiallyDisrupted if more than 1/3 of Nodes (at least 3) are not Ready,
// - normal otherwise
func ComputeZoneState(nodeReadyConditions []*api.NodeCondition) zoneState {
seenReady := false
readyNodes := 0
notReadyNodes := 0
for i := range nodeReadyConditions {
if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == api.ConditionTrue {
seenReady = true
readyNodes++
} else {
notReadyNodes++
}
}
if seenReady {
switch {
case readyNodes == 0 && notReadyNodes > 0:
return stateFullDisruption
case notReadyNodes > 2 && 2*notReadyNodes > readyNodes:
return statePartialDisruption
default:
return stateNormal
}
return stateFullSegmentation
}
// cleanupOrphanedPods deletes pods that are bound to nodes that don't
@ -320,3 +336,15 @@ func terminatePods(kubeClient clientset.Interface, recorder record.EventRecorder
}
return complete, nextAttempt, nil
}
func HealthyQPSFunc(nodeNum int, defaultQPS float32) float32 {
return defaultQPS
}
// If the cluster is large make evictions slower, if they're small stop evictions altogether.
func ReducedQPSFunc(nodeNum int, defaultQPS float32) float32 {
if nodeNum > LargeClusterThreshold {
return defaultQPS / 10
}
return 0
}

View File

@ -67,9 +67,10 @@ const (
type zoneState string
const (
stateNormal = zoneState("Normal")
stateFullSegmentation = zoneState("FullSegmentation")
statePartialSegmentation = zoneState("PartialSegmentation")
stateInitial = zoneState("Initial")
stateNormal = zoneState("Normal")
stateFullDisruption = zoneState("FullDisruption")
statePartialDisruption = zoneState("PartialDisruption")
)
type nodeStatusData struct {
@ -136,9 +137,11 @@ type NodeController struct {
// allocate/recycle CIDRs for node if allocateNodeCIDRs == true
cidrAllocator CIDRAllocator
forcefullyDeletePod func(*api.Pod) error
nodeExistsInCloudProvider func(string) (bool, error)
computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState
forcefullyDeletePod func(*api.Pod) error
nodeExistsInCloudProvider func(string) (bool, error)
computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState
enterPartialDisruptionFunc func(nodeNum int, defaultQPS float32) float32
enterFullDisruptionFunc func(nodeNum int, defaultQPS float32) float32
zoneStates map[string]zoneState
@ -192,28 +195,30 @@ func NewNodeController(
}
nc := &NodeController{
cloud: cloud,
knownNodeSet: make(map[string]*api.Node),
kubeClient: kubeClient,
recorder: recorder,
podEvictionTimeout: podEvictionTimeout,
maximumGracePeriod: 5 * time.Minute,
zonePodEvictor: make(map[string]*RateLimitedTimedQueue),
zoneTerminationEvictor: make(map[string]*RateLimitedTimedQueue),
nodeStatusMap: make(map[string]nodeStatusData),
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod,
lookupIP: net.LookupIP,
now: unversioned.Now,
clusterCIDR: clusterCIDR,
serviceCIDR: serviceCIDR,
allocateNodeCIDRs: allocateNodeCIDRs,
forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) },
nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
computeZoneStateFunc: ComputeZoneState,
evictionLimiterQPS: evictionLimiterQPS,
zoneStates: make(map[string]zoneState),
cloud: cloud,
knownNodeSet: make(map[string]*api.Node),
kubeClient: kubeClient,
recorder: recorder,
podEvictionTimeout: podEvictionTimeout,
maximumGracePeriod: 5 * time.Minute,
zonePodEvictor: make(map[string]*RateLimitedTimedQueue),
zoneTerminationEvictor: make(map[string]*RateLimitedTimedQueue),
nodeStatusMap: make(map[string]nodeStatusData),
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod,
lookupIP: net.LookupIP,
now: unversioned.Now,
clusterCIDR: clusterCIDR,
serviceCIDR: serviceCIDR,
allocateNodeCIDRs: allocateNodeCIDRs,
forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) },
nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
enterPartialDisruptionFunc: ReducedQPSFunc,
enterFullDisruptionFunc: HealthyQPSFunc,
computeZoneStateFunc: ComputeZoneState,
evictionLimiterQPS: evictionLimiterQPS,
zoneStates: make(map[string]zoneState),
}
podInformer.AddEventHandler(framework.ResourceEventHandlerFuncs{
@ -491,7 +496,7 @@ func (nc *NodeController) monitorNodeStatus() error {
"Skipping - no pods will be evicted.", node.Name)
continue
}
// We do not treat a master node as a part of the cluster for network segmentation checking.
// We do not treat a master node as a part of the cluster for network disruption checking.
if !system.IsMasterNode(node) {
zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition)
}
@ -550,39 +555,110 @@ func (nc *NodeController) monitorNodeStatus() error {
}
}
}
for k, v := range zoneToNodeConditions {
newState := nc.computeZoneStateFunc(v)
if newState == nc.zoneStates[k] {
continue
}
if newState == stateFullSegmentation {
glog.V(2).Infof("NodeController is entering network segmentation mode in zone %v.", k)
} else if newState == stateNormal {
glog.V(2).Infof("NodeController exited network segmentation mode in zone %v.", k)
}
for i := range nodes.Items {
if utilnode.GetZoneKey(&nodes.Items[i]) == k {
if newState == stateFullSegmentation {
// When zone is fully segmented we stop the eviction all together.
nc.cancelPodEviction(&nodes.Items[i])
}
if newState == stateNormal && nc.zoneStates[k] == stateFullSegmentation {
// When exiting segmentation mode update probe timestamps on all Nodes.
now := nc.now()
v := nc.nodeStatusMap[nodes.Items[i].Name]
v.probeTimestamp = now
v.readyTransitionTimestamp = now
nc.nodeStatusMap[nodes.Items[i].Name] = v
}
}
}
nc.zoneStates[k] = newState
}
nc.handleDisruption(zoneToNodeConditions, nodes)
return nil
}
func (nc *NodeController) handleDisruption(zoneToNodeConditions map[string][]*api.NodeCondition, nodes *api.NodeList) {
newZoneStates := map[string]zoneState{}
allAreFullyDisrupted := true
for k, v := range zoneToNodeConditions {
newState := nc.computeZoneStateFunc(v)
if newState != stateFullDisruption {
allAreFullyDisrupted = false
}
newZoneStates[k] = newState
if _, had := nc.zoneStates[k]; !had {
nc.zoneStates[k] = stateInitial
}
}
allWasFullyDisrupted := true
for k, v := range nc.zoneStates {
if _, have := zoneToNodeConditions[k]; !have {
delete(nc.zoneStates, k)
continue
}
if v != stateFullDisruption {
allWasFullyDisrupted = false
break
}
}
// At least one node was responding in previous pass or in the current pass. Semantics is as follows:
// - if the new state is "partialDisruption" we call a user defined function that returns a new limiter to use,
// - if the new state is "normal" we resume normal operation (go back to default limiter settings),
// - if new state is "fullDisruption" we restore normal eviction rate,
// - unless all zones in the cluster are in "fullDisruption" - in that case we stop all evictions.
if !allAreFullyDisrupted || !allWasFullyDisrupted {
// We're switching to full disruption mode
if allAreFullyDisrupted {
glog.V(0).Info("NodeController detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes.Items {
nc.cancelPodEviction(&nodes.Items[i])
}
// We stop all evictions.
for k := range nc.zonePodEvictor {
nc.zonePodEvictor[k].SwapLimiter(0)
nc.zoneTerminationEvictor[k].SwapLimiter(0)
}
for k := range nc.zoneStates {
nc.zoneStates[k] = stateFullDisruption
}
// All rate limiters are updated, so we can return early here.
return
}
// We're exiting full disruption mode
if allWasFullyDisrupted {
glog.V(0).Info("NodeController detected that some Nodes are Ready. Exiting master disruption mode.")
// When exiting disruption mode update probe timestamps on all Nodes.
now := nc.now()
for i := range nodes.Items {
v := nc.nodeStatusMap[nodes.Items[i].Name]
v.probeTimestamp = now
v.readyTransitionTimestamp = now
nc.nodeStatusMap[nodes.Items[i].Name] = v
}
// We reset all rate limiters to settings appropriate for the given state.
for k := range nc.zonePodEvictor {
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newZoneStates[k])
nc.zoneStates[k] = newZoneStates[k]
}
return
}
// We know that there's at least one not-fully disrupted so,
// we can use default behavior for rate limiters
for k, v := range nc.zoneStates {
newState := newZoneStates[k]
if v == newState {
continue
}
glog.V(0).Infof("NodeController detected that zone %v is now in state %v.", k, newState)
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
nc.zoneStates[k] = newState
}
}
}
func (nc *NodeController) setLimiterInZone(zone string, zoneSize int, state zoneState) {
switch state {
case stateNormal:
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
nc.zoneTerminationEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
case statePartialDisruption:
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
nc.zoneTerminationEvictor[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
case stateFullDisruption:
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
nc.zoneTerminationEvictor[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
}
}
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
// is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) {
@ -791,16 +867,5 @@ func (nc *NodeController) cancelPodEviction(node *api.Node) bool {
func (nc *NodeController) evictPods(node *api.Node) bool {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
foundHealty := false
for _, state := range nc.zoneStates {
if state != stateFullSegmentation {
foundHealty = true
break
}
}
if !foundHealty {
return false
}
zone := utilnode.GetZoneKey(node)
return nc.zonePodEvictor[zone].Add(node.Name)
return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name)
}

View File

@ -35,7 +35,6 @@ const (
testNodeMonitorGracePeriod = 40 * time.Second
testNodeStartupGracePeriod = 60 * time.Second
testNodeMonitorPeriod = 5 * time.Second
testRateLimiterBurst = 10000
testRateLimiterQPS = float32(10000)
)
@ -458,145 +457,6 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
expectedEvictPods: true,
description: "Node created long time ago, node controller posted Unknown for a long period of time.",
},
// NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region2",
unversioned.LabelZoneFailureDomain: "zone2",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
daemonSets: nil,
timeToPass: 60 * time.Minute,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
secondNodeNewStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
expectedEvictPods: false,
description: "Network Segmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.",
},
// NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period
// of on first Node, eviction should stop even though -master Node is healthy.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node-master",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
daemonSets: nil,
timeToPass: 60 * time.Minute,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
secondNodeNewStatus: healthyNodeNewStatus,
expectedEvictPods: false,
description: "NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of on first Node, eviction should stop even though -master Node is healthy",
},
}
for _, item := range table {
@ -647,6 +507,539 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
}
}
func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
timeToPass := 60 * time.Minute
// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
// we need second healthy node in tests. Because of how the tests are written we need to update
// the status of this Node.
healthyNodeNewStatus := api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 13, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
}
unhealthyNodeNewStatus := api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
}
table := []struct {
nodeList []*api.Node
podList []api.Pod
updatedNodeStatuses []api.NodeStatus
expectedInitialStates map[string]zoneState
expectedFollowingStates map[string]zoneState
expectedEvictPods bool
description string
}{
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Only zone is down - eviction shouldn't take place
{
nodeList: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []api.Pod{*newPod("pod0", "node0")},
updatedNodeStatuses: []api.NodeStatus{
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
},
expectedInitialStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption},
expectedFollowingStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption},
expectedEvictPods: false,
description: "Network Disruption: Only zone is down - eviction shouldn't take place.",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Both zones down - eviction shouldn't take place
{
nodeList: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region2",
unversioned.LabelZoneFailureDomain: "zone2",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []api.Pod{*newPod("pod0", "node0")},
updatedNodeStatuses: []api.NodeStatus{
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
},
expectedInitialStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
createZoneID("region2", "zone2"): stateFullDisruption,
},
expectedFollowingStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
createZoneID("region2", "zone2"): stateFullDisruption,
},
expectedEvictPods: false,
description: "Network Disruption: Both zones down - eviction shouldn't take place.",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// One zone is down - eviction should take place
{
nodeList: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone2",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []api.Pod{*newPod("pod0", "node0")},
updatedNodeStatuses: []api.NodeStatus{
unhealthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
createZoneID("region1", "zone2"): stateNormal,
},
expectedFollowingStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
createZoneID("region1", "zone2"): stateNormal,
},
expectedEvictPods: true,
description: "Network Disruption: One zone is down - eviction should take place.",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period
// of on first Node, eviction should stop even though -master Node is healthy.
{
nodeList: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node-master",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []api.Pod{*newPod("pod0", "node0")},
updatedNodeStatuses: []api.NodeStatus{
unhealthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
},
expectedFollowingStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
},
expectedEvictPods: false,
description: "NetworkDisruption: eviction should stop, only -master Node is healthy",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Initially both zones down, one comes back - eviction should take place
{
nodeList: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone2",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []api.Pod{*newPod("pod0", "node0")},
updatedNodeStatuses: []api.NodeStatus{
unhealthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
createZoneID("region1", "zone2"): stateFullDisruption,
},
expectedFollowingStates: map[string]zoneState{
createZoneID("region1", "zone1"): stateFullDisruption,
createZoneID("region1", "zone2"): stateNormal,
},
expectedEvictPods: true,
description: "Initially both zones down, one comes back - eviction should take place",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Zone is partially disrupted - eviction should take place
{
nodeList: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node1",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node2",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node3",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: api.ObjectMeta{
Name: "node4",
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
unversioned.LabelZoneRegion: "region1",
unversioned.LabelZoneFailureDomain: "zone1",
},
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []api.Pod{*newPod("pod0", "node0")},
updatedNodeStatuses: []api.NodeStatus{
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
healthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]zoneState{
createZoneID("region1", "zone1"): statePartialDisruption,
},
expectedFollowingStates: map[string]zoneState{
createZoneID("region1", "zone1"): statePartialDisruption,
},
expectedEvictPods: true,
description: "Zone is partially disrupted - eviction should take place.",
},
}
for _, item := range table {
fakeNodeHandler := &FakeNodeHandler{
Existing: item.nodeList,
Clientset: fake.NewSimpleClientset(&api.PodList{Items: item.podList}),
}
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
evictionTimeout, testRateLimiterQPS, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
nodeController.now = func() unversioned.Time { return fakeNow }
nodeController.enterPartialDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 {
return testRateLimiterQPS
}
nodeController.enterFullDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 {
return testRateLimiterQPS
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err)
}
for zone, state := range item.expectedInitialStates {
if state != nodeController.zoneStates[zone] {
t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
}
}
nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(timeToPass)} }
for i := range item.updatedNodeStatuses {
fakeNodeHandler.Existing[i].Status = item.updatedNodeStatuses[i]
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err)
}
// Give some time for rate-limiter to reload
time.Sleep(50 * time.Millisecond)
for zone, state := range item.expectedFollowingStates {
if state != nodeController.zoneStates[zone] {
t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
}
}
zones := getZones(fakeNodeHandler)
for _, zone := range zones {
nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
remaining, _ := deletePods(fakeNodeHandler, nodeController.recorder, value.Value, nodeController.daemonSetStore)
if remaining {
nodeController.zoneTerminationEvictor[zone].Add(value.Value)
}
return true, 0
})
nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
terminatePods(fakeNodeHandler, nodeController.recorder, value.Value, value.AddedAt, nodeController.maximumGracePeriod)
return true, 0
})
}
podEvicted := false
for _, action := range fakeNodeHandler.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
podEvicted = true
break
}
}
if item.expectedEvictPods != podEvicted {
t.Errorf("%v: expected pod eviction: %+v, got %+v", item.description, item.expectedEvictPods, podEvicted)
}
}
}
// TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
// pods and the node when kubelet has not reported, and the cloudprovider says
// the node is gone.

View File

@ -21,9 +21,10 @@ import (
"sync"
"time"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/util/flowcontrol"
"k8s.io/kubernetes/pkg/util/sets"
"github.com/golang/glog"
)
// TimedValue is a value that should be processed at a designated time.
@ -179,7 +180,7 @@ func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
for ok {
// rate limit the queue checking
if !q.limiter.TryAccept() {
glog.V(10).Info("Try rate limited...")
glog.V(10).Infof("Try rate limited for value: %v", val)
// Try again later
break
}

View File

@ -247,3 +247,7 @@ func getZones(nodeHandler *FakeNodeHandler) []string {
}
return zones.List()
}
func createZoneID(region, zone string) string {
return region + ":\x00:" + zone
}