mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 13:37:30 +00:00
Merge pull request #28897 from gmarek/hooks2
Automatic merge from submit-queue Change eviction logic in NodeController and make it Zone-aware Ref. #28832 This PR changes the behavior of the NodeController. From now on ```release-note Change eviction policies in NodeController: - add a "partialDisruption" mode, when more than 33% of Nodes in the zone are not Ready - add "fullDisruption" mode, when all Nodes in the zone are not Ready Eviction behavior depends on the mode in which NodeController is operating: - if the new state is "partialDisruption" or "fullDisruption" we call a user defined function that returns a new QPS to use (default 1/10 of the default rate, and the default rate respectively), - if the new state is "normal" we resume normal operation (go back to default limiter settings), - if all zones in the cluster are in "fullDisruption" state we stop all evictions. ``` cc @wojtek-t @smarterclayton @davidopp
This commit is contained in:
commit
68def062e2
@ -35,18 +35,34 @@ import (
|
|||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Number of Nodes that needs to be in the cluster for it to be treated as "large"
|
||||||
|
LargeClusterThreshold = 20
|
||||||
|
)
|
||||||
|
|
||||||
// This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone.
|
// This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone.
|
||||||
|
// The zone is considered:
|
||||||
|
// - fullyDisrupted if there're no Ready Nodes,
|
||||||
|
// - partiallyDisrupted if more than 1/3 of Nodes (at least 3) are not Ready,
|
||||||
|
// - normal otherwise
|
||||||
func ComputeZoneState(nodeReadyConditions []*api.NodeCondition) zoneState {
|
func ComputeZoneState(nodeReadyConditions []*api.NodeCondition) zoneState {
|
||||||
seenReady := false
|
readyNodes := 0
|
||||||
|
notReadyNodes := 0
|
||||||
for i := range nodeReadyConditions {
|
for i := range nodeReadyConditions {
|
||||||
if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == api.ConditionTrue {
|
if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == api.ConditionTrue {
|
||||||
seenReady = true
|
readyNodes++
|
||||||
|
} else {
|
||||||
|
notReadyNodes++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if seenReady {
|
switch {
|
||||||
|
case readyNodes == 0 && notReadyNodes > 0:
|
||||||
|
return stateFullDisruption
|
||||||
|
case notReadyNodes > 2 && 2*notReadyNodes > readyNodes:
|
||||||
|
return statePartialDisruption
|
||||||
|
default:
|
||||||
return stateNormal
|
return stateNormal
|
||||||
}
|
}
|
||||||
return stateFullSegmentation
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanupOrphanedPods deletes pods that are bound to nodes that don't
|
// cleanupOrphanedPods deletes pods that are bound to nodes that don't
|
||||||
@ -320,3 +336,15 @@ func terminatePods(kubeClient clientset.Interface, recorder record.EventRecorder
|
|||||||
}
|
}
|
||||||
return complete, nextAttempt, nil
|
return complete, nextAttempt, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func HealthyQPSFunc(nodeNum int, defaultQPS float32) float32 {
|
||||||
|
return defaultQPS
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the cluster is large make evictions slower, if they're small stop evictions altogether.
|
||||||
|
func ReducedQPSFunc(nodeNum int, defaultQPS float32) float32 {
|
||||||
|
if nodeNum > LargeClusterThreshold {
|
||||||
|
return defaultQPS / 10
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
@ -67,9 +67,10 @@ const (
|
|||||||
type zoneState string
|
type zoneState string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
stateInitial = zoneState("Initial")
|
||||||
stateNormal = zoneState("Normal")
|
stateNormal = zoneState("Normal")
|
||||||
stateFullSegmentation = zoneState("FullSegmentation")
|
stateFullDisruption = zoneState("FullDisruption")
|
||||||
statePartialSegmentation = zoneState("PartialSegmentation")
|
statePartialDisruption = zoneState("PartialDisruption")
|
||||||
)
|
)
|
||||||
|
|
||||||
type nodeStatusData struct {
|
type nodeStatusData struct {
|
||||||
@ -139,6 +140,8 @@ type NodeController struct {
|
|||||||
forcefullyDeletePod func(*api.Pod) error
|
forcefullyDeletePod func(*api.Pod) error
|
||||||
nodeExistsInCloudProvider func(string) (bool, error)
|
nodeExistsInCloudProvider func(string) (bool, error)
|
||||||
computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState
|
computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState
|
||||||
|
enterPartialDisruptionFunc func(nodeNum int, defaultQPS float32) float32
|
||||||
|
enterFullDisruptionFunc func(nodeNum int, defaultQPS float32) float32
|
||||||
|
|
||||||
zoneStates map[string]zoneState
|
zoneStates map[string]zoneState
|
||||||
|
|
||||||
@ -211,6 +214,8 @@ func NewNodeController(
|
|||||||
allocateNodeCIDRs: allocateNodeCIDRs,
|
allocateNodeCIDRs: allocateNodeCIDRs,
|
||||||
forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) },
|
forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) },
|
||||||
nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
|
nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
|
||||||
|
enterPartialDisruptionFunc: ReducedQPSFunc,
|
||||||
|
enterFullDisruptionFunc: HealthyQPSFunc,
|
||||||
computeZoneStateFunc: ComputeZoneState,
|
computeZoneStateFunc: ComputeZoneState,
|
||||||
evictionLimiterQPS: evictionLimiterQPS,
|
evictionLimiterQPS: evictionLimiterQPS,
|
||||||
zoneStates: make(map[string]zoneState),
|
zoneStates: make(map[string]zoneState),
|
||||||
@ -491,7 +496,7 @@ func (nc *NodeController) monitorNodeStatus() error {
|
|||||||
"Skipping - no pods will be evicted.", node.Name)
|
"Skipping - no pods will be evicted.", node.Name)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// We do not treat a master node as a part of the cluster for network segmentation checking.
|
// We do not treat a master node as a part of the cluster for network disruption checking.
|
||||||
if !system.IsMasterNode(node) {
|
if !system.IsMasterNode(node) {
|
||||||
zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition)
|
zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition)
|
||||||
}
|
}
|
||||||
@ -550,37 +555,108 @@ func (nc *NodeController) monitorNodeStatus() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
nc.handleDisruption(zoneToNodeConditions, nodes)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nc *NodeController) handleDisruption(zoneToNodeConditions map[string][]*api.NodeCondition, nodes *api.NodeList) {
|
||||||
|
newZoneStates := map[string]zoneState{}
|
||||||
|
allAreFullyDisrupted := true
|
||||||
for k, v := range zoneToNodeConditions {
|
for k, v := range zoneToNodeConditions {
|
||||||
newState := nc.computeZoneStateFunc(v)
|
newState := nc.computeZoneStateFunc(v)
|
||||||
if newState == nc.zoneStates[k] {
|
if newState != stateFullDisruption {
|
||||||
|
allAreFullyDisrupted = false
|
||||||
|
}
|
||||||
|
newZoneStates[k] = newState
|
||||||
|
if _, had := nc.zoneStates[k]; !had {
|
||||||
|
nc.zoneStates[k] = stateInitial
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
allWasFullyDisrupted := true
|
||||||
|
for k, v := range nc.zoneStates {
|
||||||
|
if _, have := zoneToNodeConditions[k]; !have {
|
||||||
|
delete(nc.zoneStates, k)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if newState == stateFullSegmentation {
|
if v != stateFullDisruption {
|
||||||
glog.V(2).Infof("NodeController is entering network segmentation mode in zone %v.", k)
|
allWasFullyDisrupted = false
|
||||||
} else if newState == stateNormal {
|
break
|
||||||
glog.V(2).Infof("NodeController exited network segmentation mode in zone %v.", k)
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// At least one node was responding in previous pass or in the current pass. Semantics is as follows:
|
||||||
|
// - if the new state is "partialDisruption" we call a user defined function that returns a new limiter to use,
|
||||||
|
// - if the new state is "normal" we resume normal operation (go back to default limiter settings),
|
||||||
|
// - if new state is "fullDisruption" we restore normal eviction rate,
|
||||||
|
// - unless all zones in the cluster are in "fullDisruption" - in that case we stop all evictions.
|
||||||
|
if !allAreFullyDisrupted || !allWasFullyDisrupted {
|
||||||
|
// We're switching to full disruption mode
|
||||||
|
if allAreFullyDisrupted {
|
||||||
|
glog.V(0).Info("NodeController detected that all Nodes are not-Ready. Entering master disruption mode.")
|
||||||
for i := range nodes.Items {
|
for i := range nodes.Items {
|
||||||
if utilnode.GetZoneKey(&nodes.Items[i]) == k {
|
|
||||||
if newState == stateFullSegmentation {
|
|
||||||
// When zone is fully segmented we stop the eviction all together.
|
|
||||||
nc.cancelPodEviction(&nodes.Items[i])
|
nc.cancelPodEviction(&nodes.Items[i])
|
||||||
}
|
}
|
||||||
if newState == stateNormal && nc.zoneStates[k] == stateFullSegmentation {
|
// We stop all evictions.
|
||||||
// When exiting segmentation mode update probe timestamps on all Nodes.
|
for k := range nc.zonePodEvictor {
|
||||||
|
nc.zonePodEvictor[k].SwapLimiter(0)
|
||||||
|
nc.zoneTerminationEvictor[k].SwapLimiter(0)
|
||||||
|
}
|
||||||
|
for k := range nc.zoneStates {
|
||||||
|
nc.zoneStates[k] = stateFullDisruption
|
||||||
|
}
|
||||||
|
// All rate limiters are updated, so we can return early here.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// We're exiting full disruption mode
|
||||||
|
if allWasFullyDisrupted {
|
||||||
|
glog.V(0).Info("NodeController detected that some Nodes are Ready. Exiting master disruption mode.")
|
||||||
|
// When exiting disruption mode update probe timestamps on all Nodes.
|
||||||
now := nc.now()
|
now := nc.now()
|
||||||
|
for i := range nodes.Items {
|
||||||
v := nc.nodeStatusMap[nodes.Items[i].Name]
|
v := nc.nodeStatusMap[nodes.Items[i].Name]
|
||||||
v.probeTimestamp = now
|
v.probeTimestamp = now
|
||||||
v.readyTransitionTimestamp = now
|
v.readyTransitionTimestamp = now
|
||||||
nc.nodeStatusMap[nodes.Items[i].Name] = v
|
nc.nodeStatusMap[nodes.Items[i].Name] = v
|
||||||
}
|
}
|
||||||
|
// We reset all rate limiters to settings appropriate for the given state.
|
||||||
|
for k := range nc.zonePodEvictor {
|
||||||
|
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newZoneStates[k])
|
||||||
|
nc.zoneStates[k] = newZoneStates[k]
|
||||||
}
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
// We know that there's at least one not-fully disrupted so,
|
||||||
|
// we can use default behavior for rate limiters
|
||||||
|
for k, v := range nc.zoneStates {
|
||||||
|
newState := newZoneStates[k]
|
||||||
|
if v == newState {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
glog.V(0).Infof("NodeController detected that zone %v is now in state %v.", k, newState)
|
||||||
|
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
|
||||||
nc.zoneStates[k] = newState
|
nc.zoneStates[k] = newState
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
func (nc *NodeController) setLimiterInZone(zone string, zoneSize int, state zoneState) {
|
||||||
|
switch state {
|
||||||
|
case stateNormal:
|
||||||
|
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
|
||||||
|
nc.zoneTerminationEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
|
||||||
|
case statePartialDisruption:
|
||||||
|
nc.zonePodEvictor[zone].SwapLimiter(
|
||||||
|
nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
||||||
|
nc.zoneTerminationEvictor[zone].SwapLimiter(
|
||||||
|
nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
||||||
|
case stateFullDisruption:
|
||||||
|
nc.zonePodEvictor[zone].SwapLimiter(
|
||||||
|
nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
||||||
|
nc.zoneTerminationEvictor[zone].SwapLimiter(
|
||||||
|
nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
|
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
|
||||||
@ -791,16 +867,5 @@ func (nc *NodeController) cancelPodEviction(node *api.Node) bool {
|
|||||||
func (nc *NodeController) evictPods(node *api.Node) bool {
|
func (nc *NodeController) evictPods(node *api.Node) bool {
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
foundHealty := false
|
return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name)
|
||||||
for _, state := range nc.zoneStates {
|
|
||||||
if state != stateFullSegmentation {
|
|
||||||
foundHealty = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !foundHealty {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
zone := utilnode.GetZoneKey(node)
|
|
||||||
return nc.zonePodEvictor[zone].Add(node.Name)
|
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,6 @@ const (
|
|||||||
testNodeMonitorGracePeriod = 40 * time.Second
|
testNodeMonitorGracePeriod = 40 * time.Second
|
||||||
testNodeStartupGracePeriod = 60 * time.Second
|
testNodeStartupGracePeriod = 60 * time.Second
|
||||||
testNodeMonitorPeriod = 5 * time.Second
|
testNodeMonitorPeriod = 5 * time.Second
|
||||||
testRateLimiterBurst = 10000
|
|
||||||
testRateLimiterQPS = float32(10000)
|
testRateLimiterQPS = float32(10000)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -458,145 +457,6 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
|||||||
expectedEvictPods: true,
|
expectedEvictPods: true,
|
||||||
description: "Node created long time ago, node controller posted Unknown for a long period of time.",
|
description: "Node created long time ago, node controller posted Unknown for a long period of time.",
|
||||||
},
|
},
|
||||||
// NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
|
|
||||||
{
|
|
||||||
fakeNodeHandler: &FakeNodeHandler{
|
|
||||||
Existing: []*api.Node{
|
|
||||||
{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: "node0",
|
|
||||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
|
||||||
Labels: map[string]string{
|
|
||||||
unversioned.LabelZoneRegion: "region1",
|
|
||||||
unversioned.LabelZoneFailureDomain: "zone1",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Status: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionUnknown,
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: "node1",
|
|
||||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
|
||||||
Labels: map[string]string{
|
|
||||||
unversioned.LabelZoneRegion: "region2",
|
|
||||||
unversioned.LabelZoneFailureDomain: "zone2",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Status: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionUnknown,
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
|
||||||
},
|
|
||||||
daemonSets: nil,
|
|
||||||
timeToPass: 60 * time.Minute,
|
|
||||||
newNodeStatus: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionUnknown,
|
|
||||||
// Node status was updated by nodecontroller 1hr ago
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
secondNodeNewStatus: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionUnknown,
|
|
||||||
// Node status was updated by nodecontroller 1hr ago
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedEvictPods: false,
|
|
||||||
description: "Network Segmentation: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.",
|
|
||||||
},
|
|
||||||
// NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period
|
|
||||||
// of on first Node, eviction should stop even though -master Node is healthy.
|
|
||||||
{
|
|
||||||
fakeNodeHandler: &FakeNodeHandler{
|
|
||||||
Existing: []*api.Node{
|
|
||||||
{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: "node0",
|
|
||||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
|
||||||
Labels: map[string]string{
|
|
||||||
unversioned.LabelZoneRegion: "region1",
|
|
||||||
unversioned.LabelZoneFailureDomain: "zone1",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Status: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionUnknown,
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: "node-master",
|
|
||||||
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
|
||||||
Labels: map[string]string{
|
|
||||||
unversioned.LabelZoneRegion: "region1",
|
|
||||||
unversioned.LabelZoneFailureDomain: "zone1",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Status: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionTrue,
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
|
||||||
},
|
|
||||||
daemonSets: nil,
|
|
||||||
timeToPass: 60 * time.Minute,
|
|
||||||
newNodeStatus: api.NodeStatus{
|
|
||||||
Conditions: []api.NodeCondition{
|
|
||||||
{
|
|
||||||
Type: api.NodeReady,
|
|
||||||
Status: api.ConditionUnknown,
|
|
||||||
// Node status was updated by nodecontroller 1hr ago
|
|
||||||
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
secondNodeNewStatus: healthyNodeNewStatus,
|
|
||||||
expectedEvictPods: false,
|
|
||||||
description: "NetworkSegmentation: Node created long time ago, node controller posted Unknown for a long period of on first Node, eviction should stop even though -master Node is healthy",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, item := range table {
|
for _, item := range table {
|
||||||
@ -647,6 +507,539 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
||||||
|
fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
|
evictionTimeout := 10 * time.Minute
|
||||||
|
timeToPass := 60 * time.Minute
|
||||||
|
|
||||||
|
// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
|
||||||
|
// we need second healthy node in tests. Because of how the tests are written we need to update
|
||||||
|
// the status of this Node.
|
||||||
|
healthyNodeNewStatus := api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 13, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
unhealthyNodeNewStatus := api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
// Node status was updated by nodecontroller 1hr ago
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
table := []struct {
|
||||||
|
nodeList []*api.Node
|
||||||
|
podList []api.Pod
|
||||||
|
updatedNodeStatuses []api.NodeStatus
|
||||||
|
expectedInitialStates map[string]zoneState
|
||||||
|
expectedFollowingStates map[string]zoneState
|
||||||
|
expectedEvictPods bool
|
||||||
|
description string
|
||||||
|
}{
|
||||||
|
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
|
||||||
|
// Only zone is down - eviction shouldn't take place
|
||||||
|
{
|
||||||
|
nodeList: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
podList: []api.Pod{*newPod("pod0", "node0")},
|
||||||
|
updatedNodeStatuses: []api.NodeStatus{
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
},
|
||||||
|
expectedInitialStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption},
|
||||||
|
expectedFollowingStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption},
|
||||||
|
expectedEvictPods: false,
|
||||||
|
description: "Network Disruption: Only zone is down - eviction shouldn't take place.",
|
||||||
|
},
|
||||||
|
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
|
||||||
|
// Both zones down - eviction shouldn't take place
|
||||||
|
{
|
||||||
|
nodeList: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region2",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
podList: []api.Pod{*newPod("pod0", "node0")},
|
||||||
|
updatedNodeStatuses: []api.NodeStatus{
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
},
|
||||||
|
expectedInitialStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
createZoneID("region2", "zone2"): stateFullDisruption,
|
||||||
|
},
|
||||||
|
expectedFollowingStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
createZoneID("region2", "zone2"): stateFullDisruption,
|
||||||
|
},
|
||||||
|
expectedEvictPods: false,
|
||||||
|
description: "Network Disruption: Both zones down - eviction shouldn't take place.",
|
||||||
|
},
|
||||||
|
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
|
||||||
|
// One zone is down - eviction should take place
|
||||||
|
{
|
||||||
|
nodeList: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
podList: []api.Pod{*newPod("pod0", "node0")},
|
||||||
|
updatedNodeStatuses: []api.NodeStatus{
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
healthyNodeNewStatus,
|
||||||
|
},
|
||||||
|
expectedInitialStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
createZoneID("region1", "zone2"): stateNormal,
|
||||||
|
},
|
||||||
|
expectedFollowingStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
createZoneID("region1", "zone2"): stateNormal,
|
||||||
|
},
|
||||||
|
expectedEvictPods: true,
|
||||||
|
description: "Network Disruption: One zone is down - eviction should take place.",
|
||||||
|
},
|
||||||
|
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period
|
||||||
|
// of on first Node, eviction should stop even though -master Node is healthy.
|
||||||
|
{
|
||||||
|
nodeList: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node-master",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
podList: []api.Pod{*newPod("pod0", "node0")},
|
||||||
|
updatedNodeStatuses: []api.NodeStatus{
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
healthyNodeNewStatus,
|
||||||
|
},
|
||||||
|
expectedInitialStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
},
|
||||||
|
expectedFollowingStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
},
|
||||||
|
expectedEvictPods: false,
|
||||||
|
description: "NetworkDisruption: eviction should stop, only -master Node is healthy",
|
||||||
|
},
|
||||||
|
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
|
||||||
|
// Initially both zones down, one comes back - eviction should take place
|
||||||
|
{
|
||||||
|
nodeList: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
podList: []api.Pod{*newPod("pod0", "node0")},
|
||||||
|
updatedNodeStatuses: []api.NodeStatus{
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
healthyNodeNewStatus,
|
||||||
|
},
|
||||||
|
expectedInitialStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
createZoneID("region1", "zone2"): stateFullDisruption,
|
||||||
|
},
|
||||||
|
expectedFollowingStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): stateFullDisruption,
|
||||||
|
createZoneID("region1", "zone2"): stateNormal,
|
||||||
|
},
|
||||||
|
expectedEvictPods: true,
|
||||||
|
description: "Initially both zones down, one comes back - eviction should take place",
|
||||||
|
},
|
||||||
|
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
|
||||||
|
// Zone is partially disrupted - eviction should take place
|
||||||
|
{
|
||||||
|
nodeList: []*api.Node{
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node0",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node1",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node2",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionUnknown,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node3",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: "node4",
|
||||||
|
CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
Labels: map[string]string{
|
||||||
|
unversioned.LabelZoneRegion: "region1",
|
||||||
|
unversioned.LabelZoneFailureDomain: "zone1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: api.NodeStatus{
|
||||||
|
Conditions: []api.NodeCondition{
|
||||||
|
{
|
||||||
|
Type: api.NodeReady,
|
||||||
|
Status: api.ConditionTrue,
|
||||||
|
LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
podList: []api.Pod{*newPod("pod0", "node0")},
|
||||||
|
updatedNodeStatuses: []api.NodeStatus{
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
unhealthyNodeNewStatus,
|
||||||
|
healthyNodeNewStatus,
|
||||||
|
healthyNodeNewStatus,
|
||||||
|
},
|
||||||
|
expectedInitialStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): statePartialDisruption,
|
||||||
|
},
|
||||||
|
expectedFollowingStates: map[string]zoneState{
|
||||||
|
createZoneID("region1", "zone1"): statePartialDisruption,
|
||||||
|
},
|
||||||
|
expectedEvictPods: true,
|
||||||
|
description: "Zone is partially disrupted - eviction should take place.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range table {
|
||||||
|
fakeNodeHandler := &FakeNodeHandler{
|
||||||
|
Existing: item.nodeList,
|
||||||
|
Clientset: fake.NewSimpleClientset(&api.PodList{Items: item.podList}),
|
||||||
|
}
|
||||||
|
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
|
||||||
|
evictionTimeout, testRateLimiterQPS, testNodeMonitorGracePeriod,
|
||||||
|
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
|
nodeController.enterPartialDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 {
|
||||||
|
return testRateLimiterQPS
|
||||||
|
}
|
||||||
|
nodeController.enterFullDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 {
|
||||||
|
return testRateLimiterQPS
|
||||||
|
}
|
||||||
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
|
t.Errorf("%v: unexpected error: %v", item.description, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for zone, state := range item.expectedInitialStates {
|
||||||
|
if state != nodeController.zoneStates[zone] {
|
||||||
|
t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(timeToPass)} }
|
||||||
|
for i := range item.updatedNodeStatuses {
|
||||||
|
fakeNodeHandler.Existing[i].Status = item.updatedNodeStatuses[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
|
t.Errorf("%v: unexpected error: %v", item.description, err)
|
||||||
|
}
|
||||||
|
// Give some time for rate-limiter to reload
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
for zone, state := range item.expectedFollowingStates {
|
||||||
|
if state != nodeController.zoneStates[zone] {
|
||||||
|
t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zones := getZones(fakeNodeHandler)
|
||||||
|
for _, zone := range zones {
|
||||||
|
nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
|
||||||
|
remaining, _ := deletePods(fakeNodeHandler, nodeController.recorder, value.Value, nodeController.daemonSetStore)
|
||||||
|
if remaining {
|
||||||
|
nodeController.zoneTerminationEvictor[zone].Add(value.Value)
|
||||||
|
}
|
||||||
|
return true, 0
|
||||||
|
})
|
||||||
|
nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
|
||||||
|
terminatePods(fakeNodeHandler, nodeController.recorder, value.Value, value.AddedAt, nodeController.maximumGracePeriod)
|
||||||
|
return true, 0
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
podEvicted := false
|
||||||
|
for _, action := range fakeNodeHandler.Actions() {
|
||||||
|
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
|
||||||
|
podEvicted = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if item.expectedEvictPods != podEvicted {
|
||||||
|
t.Errorf("%v: expected pod eviction: %+v, got %+v", item.description, item.expectedEvictPods, podEvicted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
|
// TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
|
||||||
// pods and the node when kubelet has not reported, and the cloudprovider says
|
// pods and the node when kubelet has not reported, and the cloudprovider says
|
||||||
// the node is gone.
|
// the node is gone.
|
||||||
|
@ -21,9 +21,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
|
||||||
"k8s.io/kubernetes/pkg/util/flowcontrol"
|
"k8s.io/kubernetes/pkg/util/flowcontrol"
|
||||||
"k8s.io/kubernetes/pkg/util/sets"
|
"k8s.io/kubernetes/pkg/util/sets"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TimedValue is a value that should be processed at a designated time.
|
// TimedValue is a value that should be processed at a designated time.
|
||||||
@ -179,7 +180,7 @@ func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
|
|||||||
for ok {
|
for ok {
|
||||||
// rate limit the queue checking
|
// rate limit the queue checking
|
||||||
if !q.limiter.TryAccept() {
|
if !q.limiter.TryAccept() {
|
||||||
glog.V(10).Info("Try rate limited...")
|
glog.V(10).Infof("Try rate limited for value: %v", val)
|
||||||
// Try again later
|
// Try again later
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -247,3 +247,7 @@ func getZones(nodeHandler *FakeNodeHandler) []string {
|
|||||||
}
|
}
|
||||||
return zones.List()
|
return zones.List()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func createZoneID(region, zone string) string {
|
||||||
|
return region + ":\x00:" + zone
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user