Merge pull request #115840 from atosatto/remove-taint-manager-cli

Remove enable-taint-manager and pod-eviction-timeout CLI flags
This commit is contained in:
Kubernetes Prow Robot 2023-03-13 08:13:10 -07:00 committed by GitHub
commit a0b1bee7c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 63 additions and 1025 deletions

View File

@ -537,7 +537,6 @@ API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,N
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeIPAMControllerConfiguration,NodeCIDRMaskSizeIPv6 API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeIPAMControllerConfiguration,NodeCIDRMaskSizeIPv6
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeIPAMControllerConfiguration,SecondaryServiceCIDR API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeIPAMControllerConfiguration,SecondaryServiceCIDR
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeIPAMControllerConfiguration,ServiceCIDR API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeIPAMControllerConfiguration,ServiceCIDR
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,EnableTaintManager
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,LargeClusterSizeThreshold API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,LargeClusterSizeThreshold
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,NodeEvictionRate API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,NodeEvictionRate
API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,NodeMonitorGracePeriod API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,NodeLifecycleControllerConfiguration,NodeMonitorGracePeriod

View File

@ -186,12 +186,10 @@ func startNodeLifecycleController(ctx context.Context, controllerContext Control
controllerContext.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration, controllerContext.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration,
controllerContext.ComponentConfig.NodeLifecycleController.NodeStartupGracePeriod.Duration, controllerContext.ComponentConfig.NodeLifecycleController.NodeStartupGracePeriod.Duration,
controllerContext.ComponentConfig.NodeLifecycleController.NodeMonitorGracePeriod.Duration, controllerContext.ComponentConfig.NodeLifecycleController.NodeMonitorGracePeriod.Duration,
controllerContext.ComponentConfig.NodeLifecycleController.PodEvictionTimeout.Duration,
controllerContext.ComponentConfig.NodeLifecycleController.NodeEvictionRate, controllerContext.ComponentConfig.NodeLifecycleController.NodeEvictionRate,
controllerContext.ComponentConfig.NodeLifecycleController.SecondaryNodeEvictionRate, controllerContext.ComponentConfig.NodeLifecycleController.SecondaryNodeEvictionRate,
controllerContext.ComponentConfig.NodeLifecycleController.LargeClusterSizeThreshold, controllerContext.ComponentConfig.NodeLifecycleController.LargeClusterSizeThreshold,
controllerContext.ComponentConfig.NodeLifecycleController.UnhealthyZoneThreshold, controllerContext.ComponentConfig.NodeLifecycleController.UnhealthyZoneThreshold,
controllerContext.ComponentConfig.NodeLifecycleController.EnableTaintManager,
) )
if err != nil { if err != nil {
return nil, true, err return nil, true, err

View File

@ -39,14 +39,10 @@ func (o *NodeLifecycleControllerOptions) AddFlags(fs *pflag.FlagSet) {
"Amount of time which we allow running Node to be unresponsive before marking it unhealthy. "+ "Amount of time which we allow running Node to be unresponsive before marking it unhealthy. "+
"Must be N times more than kubelet's nodeStatusUpdateFrequency, "+ "Must be N times more than kubelet's nodeStatusUpdateFrequency, "+
"where N means number of retries allowed for kubelet to post node status.") "where N means number of retries allowed for kubelet to post node status.")
fs.DurationVar(&o.PodEvictionTimeout.Duration, "pod-eviction-timeout", o.PodEvictionTimeout.Duration, "The grace period for deleting pods on failed nodes.")
fs.Float32Var(&o.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.") fs.Float32Var(&o.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.")
fs.Float32Var(&o.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.") fs.Float32Var(&o.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
fs.Int32Var(&o.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.") fs.Int32Var(&o.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.")
fs.Float32Var(&o.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ") fs.Float32Var(&o.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ")
fs.BoolVar(&o.EnableTaintManager, "enable-taint-manager", o.EnableTaintManager, "If set to true enables NoExecute Taints and will evict all not-tolerating Pod running on Nodes tainted with this kind of Taints.")
fs.MarkDeprecated("enable-taint-manager", "This flag is deprecated and it will be removed in 1.27. The taint-manager is enabled by default and will remain implicitly enabled once this flag is removed.")
fs.MarkDeprecated("pod-eviction-timeout", "This flag is deprecated and it will be removed in 1.27. Once taint manager is enabled, this flag has no effect.")
} }
// ApplyTo fills up NodeLifecycleController config with options. // ApplyTo fills up NodeLifecycleController config with options.
@ -55,10 +51,8 @@ func (o *NodeLifecycleControllerOptions) ApplyTo(cfg *nodelifecycleconfig.NodeLi
return nil return nil
} }
cfg.EnableTaintManager = o.EnableTaintManager
cfg.NodeStartupGracePeriod = o.NodeStartupGracePeriod cfg.NodeStartupGracePeriod = o.NodeStartupGracePeriod
cfg.NodeMonitorGracePeriod = o.NodeMonitorGracePeriod cfg.NodeMonitorGracePeriod = o.NodeMonitorGracePeriod
cfg.PodEvictionTimeout = o.PodEvictionTimeout
cfg.NodeEvictionRate = o.NodeEvictionRate cfg.NodeEvictionRate = o.NodeEvictionRate
cfg.SecondaryNodeEvictionRate = o.SecondaryNodeEvictionRate cfg.SecondaryNodeEvictionRate = o.SecondaryNodeEvictionRate
cfg.LargeClusterSizeThreshold = o.LargeClusterSizeThreshold cfg.LargeClusterSizeThreshold = o.LargeClusterSizeThreshold

View File

@ -107,7 +107,6 @@ var args = []string{
"--enable-dynamic-provisioning=false", "--enable-dynamic-provisioning=false",
"--enable-garbage-collector=false", "--enable-garbage-collector=false",
"--enable-hostpath-provisioner=true", "--enable-hostpath-provisioner=true",
"--enable-taint-manager=false",
"--cluster-signing-duration=10h", "--cluster-signing-duration=10h",
"--flex-volume-plugin-dir=/flex-volume-plugin", "--flex-volume-plugin-dir=/flex-volume-plugin",
"--volume-host-cidr-denylist=127.0.0.1/28,feed::/16", "--volume-host-cidr-denylist=127.0.0.1/28,feed::/16",
@ -142,7 +141,6 @@ var args = []string{
"--node-monitor-grace-period=30s", "--node-monitor-grace-period=30s",
"--node-monitor-period=10s", "--node-monitor-period=10s",
"--node-startup-grace-period=30s", "--node-startup-grace-period=30s",
"--pod-eviction-timeout=2m",
"--profiling=false", "--profiling=false",
"--pv-recycler-increment-timeout-nfs=45", "--pv-recycler-increment-timeout-nfs=45",
"--pv-recycler-minimum-timeout-hostpath=45", "--pv-recycler-minimum-timeout-hostpath=45",
@ -344,12 +342,10 @@ func TestAddFlags(t *testing.T) {
}, },
NodeLifecycleController: &NodeLifecycleControllerOptions{ NodeLifecycleController: &NodeLifecycleControllerOptions{
&nodelifecycleconfig.NodeLifecycleControllerConfiguration{ &nodelifecycleconfig.NodeLifecycleControllerConfiguration{
EnableTaintManager: false,
NodeEvictionRate: 0.2, NodeEvictionRate: 0.2,
SecondaryNodeEvictionRate: 0.05, SecondaryNodeEvictionRate: 0.05,
NodeMonitorGracePeriod: metav1.Duration{Duration: 30 * time.Second}, NodeMonitorGracePeriod: metav1.Duration{Duration: 30 * time.Second},
NodeStartupGracePeriod: metav1.Duration{Duration: 30 * time.Second}, NodeStartupGracePeriod: metav1.Duration{Duration: 30 * time.Second},
PodEvictionTimeout: metav1.Duration{Duration: 2 * time.Minute},
LargeClusterSizeThreshold: 100, LargeClusterSizeThreshold: 100,
UnhealthyZoneThreshold: 0.6, UnhealthyZoneThreshold: 0.6,
}, },
@ -589,12 +585,10 @@ func TestApplyTo(t *testing.T) {
NodeCIDRMaskSizeIPv6: 108, NodeCIDRMaskSizeIPv6: 108,
}, },
NodeLifecycleController: nodelifecycleconfig.NodeLifecycleControllerConfiguration{ NodeLifecycleController: nodelifecycleconfig.NodeLifecycleControllerConfiguration{
EnableTaintManager: false,
NodeEvictionRate: 0.2, NodeEvictionRate: 0.2,
SecondaryNodeEvictionRate: 0.05, SecondaryNodeEvictionRate: 0.05,
NodeMonitorGracePeriod: metav1.Duration{Duration: 30 * time.Second}, NodeMonitorGracePeriod: metav1.Duration{Duration: 30 * time.Second},
NodeStartupGracePeriod: metav1.Duration{Duration: 30 * time.Second}, NodeStartupGracePeriod: metav1.Duration{Duration: 30 * time.Second},
PodEvictionTimeout: metav1.Duration{Duration: 2 * time.Minute},
LargeClusterSizeThreshold: 100, LargeClusterSizeThreshold: 100,
UnhealthyZoneThreshold: 0.6, UnhealthyZoneThreshold: 0.6,
}, },
@ -1164,12 +1158,10 @@ func TestValidateControllersOptions(t *testing.T) {
expectErrors: false, expectErrors: false,
validate: (&NodeLifecycleControllerOptions{ validate: (&NodeLifecycleControllerOptions{
&nodelifecycleconfig.NodeLifecycleControllerConfiguration{ &nodelifecycleconfig.NodeLifecycleControllerConfiguration{
EnableTaintManager: false,
NodeEvictionRate: 0.2, NodeEvictionRate: 0.2,
SecondaryNodeEvictionRate: 0.05, SecondaryNodeEvictionRate: 0.05,
NodeMonitorGracePeriod: metav1.Duration{Duration: 30 * time.Second}, NodeMonitorGracePeriod: metav1.Duration{Duration: 30 * time.Second},
NodeStartupGracePeriod: metav1.Duration{Duration: 30 * time.Second}, NodeStartupGracePeriod: metav1.Duration{Duration: 30 * time.Second},
PodEvictionTimeout: metav1.Duration{Duration: 2 * time.Minute},
LargeClusterSizeThreshold: 100, LargeClusterSizeThreshold: 100,
UnhealthyZoneThreshold: 0.6, UnhealthyZoneThreshold: 0.6,
}, },

View File

@ -22,9 +22,6 @@ import (
// NodeLifecycleControllerConfiguration contains elements describing NodeLifecycleController. // NodeLifecycleControllerConfiguration contains elements describing NodeLifecycleController.
type NodeLifecycleControllerConfiguration struct { type NodeLifecycleControllerConfiguration struct {
// If set to true enables NoExecute Taints and will evict all not-tolerating
// Pod running on Nodes tainted with this kind of Taints.
EnableTaintManager bool
// nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy // nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy
NodeEvictionRate float32 NodeEvictionRate float32
// secondaryNodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy // secondaryNodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy
@ -37,8 +34,6 @@ type NodeLifecycleControllerConfiguration struct {
// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet // nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
// to post node status. // to post node status.
NodeMonitorGracePeriod metav1.Duration NodeMonitorGracePeriod metav1.Duration
// podEvictionTimeout is the grace period for deleting pods on failed nodes.
PodEvictionTimeout metav1.Duration
// secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold // secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold
LargeClusterSizeThreshold int32 LargeClusterSizeThreshold int32
// Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least // Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least

View File

@ -21,7 +21,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubectrlmgrconfigv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" kubectrlmgrconfigv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1"
"k8s.io/utils/pointer"
) )
// RecommendedDefaultNodeLifecycleControllerConfiguration defaults a pointer to a // RecommendedDefaultNodeLifecycleControllerConfiguration defaults a pointer to a
@ -44,7 +43,4 @@ func RecommendedDefaultNodeLifecycleControllerConfiguration(obj *kubectrlmgrconf
if obj.NodeStartupGracePeriod == zero { if obj.NodeStartupGracePeriod == zero {
obj.NodeStartupGracePeriod = metav1.Duration{Duration: 60 * time.Second} obj.NodeStartupGracePeriod = metav1.Duration{Duration: 60 * time.Second}
} }
if obj.EnableTaintManager == nil {
obj.EnableTaintManager = pointer.Bool(true)
}
} }

View File

@ -82,28 +82,21 @@ func Convert_v1_GroupResource_To_v1alpha1_GroupResource(in *v1.GroupResource, ou
} }
func autoConvert_v1alpha1_NodeLifecycleControllerConfiguration_To_config_NodeLifecycleControllerConfiguration(in *v1alpha1.NodeLifecycleControllerConfiguration, out *config.NodeLifecycleControllerConfiguration, s conversion.Scope) error { func autoConvert_v1alpha1_NodeLifecycleControllerConfiguration_To_config_NodeLifecycleControllerConfiguration(in *v1alpha1.NodeLifecycleControllerConfiguration, out *config.NodeLifecycleControllerConfiguration, s conversion.Scope) error {
if err := v1.Convert_Pointer_bool_To_bool(&in.EnableTaintManager, &out.EnableTaintManager, s); err != nil {
return err
}
out.NodeEvictionRate = in.NodeEvictionRate out.NodeEvictionRate = in.NodeEvictionRate
out.SecondaryNodeEvictionRate = in.SecondaryNodeEvictionRate out.SecondaryNodeEvictionRate = in.SecondaryNodeEvictionRate
out.NodeStartupGracePeriod = in.NodeStartupGracePeriod out.NodeStartupGracePeriod = in.NodeStartupGracePeriod
out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod
out.PodEvictionTimeout = in.PodEvictionTimeout // WARNING: in.PodEvictionTimeout requires manual conversion: does not exist in peer-type
out.LargeClusterSizeThreshold = in.LargeClusterSizeThreshold out.LargeClusterSizeThreshold = in.LargeClusterSizeThreshold
out.UnhealthyZoneThreshold = in.UnhealthyZoneThreshold out.UnhealthyZoneThreshold = in.UnhealthyZoneThreshold
return nil return nil
} }
func autoConvert_config_NodeLifecycleControllerConfiguration_To_v1alpha1_NodeLifecycleControllerConfiguration(in *config.NodeLifecycleControllerConfiguration, out *v1alpha1.NodeLifecycleControllerConfiguration, s conversion.Scope) error { func autoConvert_config_NodeLifecycleControllerConfiguration_To_v1alpha1_NodeLifecycleControllerConfiguration(in *config.NodeLifecycleControllerConfiguration, out *v1alpha1.NodeLifecycleControllerConfiguration, s conversion.Scope) error {
if err := v1.Convert_bool_To_Pointer_bool(&in.EnableTaintManager, &out.EnableTaintManager, s); err != nil {
return err
}
out.NodeEvictionRate = in.NodeEvictionRate out.NodeEvictionRate = in.NodeEvictionRate
out.SecondaryNodeEvictionRate = in.SecondaryNodeEvictionRate out.SecondaryNodeEvictionRate = in.SecondaryNodeEvictionRate
out.NodeStartupGracePeriod = in.NodeStartupGracePeriod out.NodeStartupGracePeriod = in.NodeStartupGracePeriod
out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod
out.PodEvictionTimeout = in.PodEvictionTimeout
out.LargeClusterSizeThreshold = in.LargeClusterSizeThreshold out.LargeClusterSizeThreshold = in.LargeClusterSizeThreshold
out.UnhealthyZoneThreshold = in.UnhealthyZoneThreshold out.UnhealthyZoneThreshold = in.UnhealthyZoneThreshold
return nil return nil

View File

@ -26,7 +26,6 @@ func (in *NodeLifecycleControllerConfiguration) DeepCopyInto(out *NodeLifecycleC
*out = *in *out = *in
out.NodeStartupGracePeriod = in.NodeStartupGracePeriod out.NodeStartupGracePeriod = in.NodeStartupGracePeriod
out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod
out.PodEvictionTimeout = in.PodEvictionTimeout
return return
} }

View File

@ -205,57 +205,6 @@ type podUpdateItem struct {
name string name string
} }
type evictionStatus int
const (
unmarked = iota
toBeEvicted
evicted
)
// nodeEvictionMap stores evictionStatus data for each node.
type nodeEvictionMap struct {
lock sync.Mutex
nodeEvictions map[string]evictionStatus
}
func newNodeEvictionMap() *nodeEvictionMap {
return &nodeEvictionMap{
nodeEvictions: make(map[string]evictionStatus),
}
}
func (n *nodeEvictionMap) registerNode(nodeName string) {
n.lock.Lock()
defer n.lock.Unlock()
n.nodeEvictions[nodeName] = unmarked
}
func (n *nodeEvictionMap) unregisterNode(nodeName string) {
n.lock.Lock()
defer n.lock.Unlock()
delete(n.nodeEvictions, nodeName)
}
func (n *nodeEvictionMap) setStatus(nodeName string, status evictionStatus) bool {
n.lock.Lock()
defer n.lock.Unlock()
if _, exists := n.nodeEvictions[nodeName]; !exists {
return false
}
n.nodeEvictions[nodeName] = status
return true
}
func (n *nodeEvictionMap) getStatus(nodeName string) (evictionStatus, bool) {
n.lock.Lock()
defer n.lock.Unlock()
if _, exists := n.nodeEvictions[nodeName]; !exists {
return unmarked, false
}
return n.nodeEvictions[nodeName], true
}
// Controller is the controller that manages node's life cycle. // Controller is the controller that manages node's life cycle.
type Controller struct { type Controller struct {
taintManager *scheduler.NoExecuteTaintManager taintManager *scheduler.NoExecuteTaintManager
@ -278,9 +227,6 @@ type Controller struct {
// evictorLock protects zonePodEvictor and zoneNoExecuteTainter. // evictorLock protects zonePodEvictor and zoneNoExecuteTainter.
evictorLock sync.Mutex evictorLock sync.Mutex
nodeEvictionMap *nodeEvictionMap
// workers that evicts pods from unresponsive nodes.
zonePodEvictor map[string]*scheduler.RateLimitedTimedQueue
// workers that are responsible for tainting nodes. // workers that are responsible for tainting nodes.
zoneNoExecuteTainter map[string]*scheduler.RateLimitedTimedQueue zoneNoExecuteTainter map[string]*scheduler.RateLimitedTimedQueue
@ -327,7 +273,7 @@ type Controller struct {
// post node status/lease. It is pointless to make nodeMonitorGracePeriod // post node status/lease. It is pointless to make nodeMonitorGracePeriod
// be less than the node health signal update frequency, since there will // be less than the node health signal update frequency, since there will
// only be fresh values from Kubelet at an interval of node health signal // only be fresh values from Kubelet at an interval of node health signal
// update frequency. The constant must be less than podEvictionTimeout. // update frequency.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger // 2. nodeMonitorGracePeriod can't be too large for user experience - larger
// value takes longer for user to see up-to-date node health. // value takes longer for user to see up-to-date node health.
nodeMonitorGracePeriod time.Duration nodeMonitorGracePeriod time.Duration
@ -336,16 +282,11 @@ type Controller struct {
// Defaults to scheduler.UpdateWorkerSize. // Defaults to scheduler.UpdateWorkerSize.
nodeUpdateWorkerSize int nodeUpdateWorkerSize int
podEvictionTimeout time.Duration
evictionLimiterQPS float32 evictionLimiterQPS float32
secondaryEvictionLimiterQPS float32 secondaryEvictionLimiterQPS float32
largeClusterThreshold int32 largeClusterThreshold int32
unhealthyZoneThreshold float32 unhealthyZoneThreshold float32
// if set to true Controller will start TaintManager that will evict Pods from
// tainted nodes, if they're not tolerated.
runTaintManager bool
nodeUpdateQueue workqueue.Interface nodeUpdateQueue workqueue.Interface
podUpdateQueue workqueue.RateLimitingInterface podUpdateQueue workqueue.RateLimitingInterface
} }
@ -361,12 +302,10 @@ func NewNodeLifecycleController(
nodeMonitorPeriod time.Duration, nodeMonitorPeriod time.Duration,
nodeStartupGracePeriod time.Duration, nodeStartupGracePeriod time.Duration,
nodeMonitorGracePeriod time.Duration, nodeMonitorGracePeriod time.Duration,
podEvictionTimeout time.Duration,
evictionLimiterQPS float32, evictionLimiterQPS float32,
secondaryEvictionLimiterQPS float32, secondaryEvictionLimiterQPS float32,
largeClusterThreshold int32, largeClusterThreshold int32,
unhealthyZoneThreshold float32, unhealthyZoneThreshold float32,
runTaintManager bool,
) (*Controller, error) { ) (*Controller, error) {
logger := klog.LoggerWithName(klog.FromContext(ctx), "NodeLifecycleController") logger := klog.LoggerWithName(klog.FromContext(ctx), "NodeLifecycleController")
if kubeClient == nil { if kubeClient == nil {
@ -382,23 +321,19 @@ func NewNodeLifecycleController(
now: metav1.Now, now: metav1.Now,
knownNodeSet: make(map[string]*v1.Node), knownNodeSet: make(map[string]*v1.Node),
nodeHealthMap: newNodeHealthMap(), nodeHealthMap: newNodeHealthMap(),
nodeEvictionMap: newNodeEvictionMap(),
broadcaster: eventBroadcaster, broadcaster: eventBroadcaster,
recorder: recorder, recorder: recorder,
nodeMonitorPeriod: nodeMonitorPeriod, nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod, nodeStartupGracePeriod: nodeStartupGracePeriod,
nodeMonitorGracePeriod: nodeMonitorGracePeriod, nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeUpdateWorkerSize: scheduler.UpdateWorkerSize, nodeUpdateWorkerSize: scheduler.UpdateWorkerSize,
zonePodEvictor: make(map[string]*scheduler.RateLimitedTimedQueue),
zoneNoExecuteTainter: make(map[string]*scheduler.RateLimitedTimedQueue), zoneNoExecuteTainter: make(map[string]*scheduler.RateLimitedTimedQueue),
nodesToRetry: sync.Map{}, nodesToRetry: sync.Map{},
zoneStates: make(map[string]ZoneState), zoneStates: make(map[string]ZoneState),
podEvictionTimeout: podEvictionTimeout,
evictionLimiterQPS: evictionLimiterQPS, evictionLimiterQPS: evictionLimiterQPS,
secondaryEvictionLimiterQPS: secondaryEvictionLimiterQPS, secondaryEvictionLimiterQPS: secondaryEvictionLimiterQPS,
largeClusterThreshold: largeClusterThreshold, largeClusterThreshold: largeClusterThreshold,
unhealthyZoneThreshold: unhealthyZoneThreshold, unhealthyZoneThreshold: unhealthyZoneThreshold,
runTaintManager: runTaintManager,
nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"), nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"),
podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"), podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"),
} }
@ -477,7 +412,6 @@ func NewNodeLifecycleController(
nc.podLister = podInformer.Lister() nc.podLister = podInformer.Lister()
nc.nodeLister = nodeInformer.Lister() nc.nodeLister = nodeInformer.Lister()
if nc.runTaintManager {
nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, nc.podLister, nc.nodeLister, nc.getPodsAssignedToNode) nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, nc.podLister, nc.nodeLister, nc.getPodsAssignedToNode)
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error { AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
@ -493,13 +427,11 @@ func NewNodeLifecycleController(
return nil return nil
}), }),
}) })
}
logger.Info("Controller will reconcile labels") logger.Info("Controller will reconcile labels")
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error { AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.nodeUpdateQueue.Add(node.Name) nc.nodeUpdateQueue.Add(node.Name)
nc.nodeEvictionMap.registerNode(node.Name)
return nil return nil
}), }),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error { UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
@ -508,7 +440,6 @@ func NewNodeLifecycleController(
}), }),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error { DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
nc.nodesToRetry.Delete(node.Name) nc.nodesToRetry.Delete(node.Name)
nc.nodeEvictionMap.unregisterNode(node.Name)
return nil return nil
}), }),
}) })
@ -549,9 +480,7 @@ func (nc *Controller) Run(ctx context.Context) {
return return
} }
if nc.runTaintManager {
go nc.taintManager.Run(ctx) go nc.taintManager.Run(ctx)
}
// Start workers to reconcile labels and/or update NoSchedule taint for nodes. // Start workers to reconcile labels and/or update NoSchedule taint for nodes.
for i := 0; i < scheduler.UpdateWorkerSize; i++ { for i := 0; i < scheduler.UpdateWorkerSize; i++ {
@ -566,16 +495,9 @@ func (nc *Controller) Run(ctx context.Context) {
go wait.UntilWithContext(ctx, nc.doPodProcessingWorker, time.Second) go wait.UntilWithContext(ctx, nc.doPodProcessingWorker, time.Second)
} }
if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated // Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints. // taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.UntilWithContext(ctx, nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod) go wait.UntilWithContext(ctx, nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod)
} else {
// Managing eviction of nodes:
// When we delete pods off a node, if the node was not empty at the time we then
// queue an eviction watcher. If we hit an error, retry deletion.
go wait.UntilWithContext(ctx, nc.doEvictionPass, scheduler.NodeEvictionPeriod)
}
// Incorporate the results of node health signal pushed from kubelet to master. // Incorporate the results of node health signal pushed from kubelet to master.
go wait.UntilWithContext(ctx, func(ctx context.Context) { go wait.UntilWithContext(ctx, func(ctx context.Context) {
@ -732,73 +654,6 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
} }
} }
func (nc *Controller) doEvictionPass(ctx context.Context) {
// Extract out the keys of the map in order to not hold
// the evictorLock for the entire function and hold it
// only when nescessary.
var zonePodEvictorKeys []string
func() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
zonePodEvictorKeys = make([]string, 0, len(nc.zonePodEvictor))
for k := range nc.zonePodEvictor {
zonePodEvictorKeys = append(zonePodEvictorKeys, k)
}
}()
logger := klog.FromContext(ctx)
for _, k := range zonePodEvictorKeys {
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
func() {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
// Extracting the value without checking if the key
// exists or not is safe to do here since zones do
// not get removed, and consequently pod evictors for
// these zones also do not get removed, only added.
zonePodEvictionWorker = nc.zonePodEvictor[k]
}()
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
zonePodEvictionWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
} else if err != nil {
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
}
nodeUID, _ := value.UID.(string)
pods, err := nc.getPodsAssignedToNode(value.Value)
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to list pods from node %q: %v", value.Value, err))
return false, 0
}
remaining, err := controllerutil.DeletePods(ctx, nc.kubeClient, pods, nc.recorder, value.Value, nodeUID, nc.daemonSetStore)
if err != nil {
// We are not setting eviction status here.
// New pods will be handled by zonePodEvictor retry
// instead of immediate pod eviction.
utilruntime.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err))
return false, 0
}
if !nc.nodeEvictionMap.setStatus(value.Value, evicted) {
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KRef("", value.Value))
}
if remaining {
logger.Info("Pods awaiting deletion due to Controller eviction")
}
if node != nil {
zone := nodetopology.GetZoneKey(node)
evictionsNumber.WithLabelValues(zone).Inc()
evictionsTotal.WithLabelValues(zone).Inc()
}
return true, 0
})
}
}
// monitorNodeHealth verifies node health are constantly updated by kubelet, and // monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown". // if not, post "NodeReady==ConditionUnknown".
// This function will taint nodes who are not ready or not reachable for a long period of time. // This function will taint nodes who are not ready or not reachable for a long period of time.
@ -824,11 +679,7 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name)) controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i] nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(logger, added[i]) nc.addPodEvictorForNewZone(logger, added[i])
if nc.runTaintManager {
nc.markNodeAsReachable(ctx, added[i]) nc.markNodeAsReachable(ctx, added[i])
} else {
nc.cancelPodEviction(logger, added[i])
}
} }
for i := range deleted { for i := range deleted {
@ -845,14 +696,13 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
updateNodeHealthDuration.Observe(time.Since(start.Time).Seconds()) updateNodeHealthDuration.Observe(time.Since(start.Time).Seconds())
}() }()
var gracePeriod time.Duration
var observedReadyCondition v1.NodeCondition var observedReadyCondition v1.NodeCondition
var currentReadyCondition *v1.NodeCondition var currentReadyCondition *v1.NodeCondition
node := nodes[piece].DeepCopy() node := nodes[piece].DeepCopy()
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) { if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) {
var err error var err error
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(ctx, node) _, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(ctx, node)
if err == nil { if err == nil {
return true, nil return true, nil
} }
@ -887,13 +737,7 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
} }
return return
} }
if nc.runTaintManager {
nc.processTaintBaseEviction(ctx, node, &observedReadyCondition) nc.processTaintBaseEviction(ctx, node, &observedReadyCondition)
} else {
if err := nc.processNoTaintBaseEviction(ctx, node, &observedReadyCondition, gracePeriod, pods); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to evict all pods from node %v: %v; queuing for retry", node.Name, err))
}
}
_, needsRetry := nc.nodesToRetry.Load(node.Name) _, needsRetry := nc.nodesToRetry.Load(node.Name)
switch { switch {
@ -960,53 +804,6 @@ func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Nod
} }
} }
func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition, gracePeriod time.Duration, pods []*v1.Pod) error {
decisionTimestamp := nc.now()
nodeHealthData := nc.nodeHealthMap.getDeepCopy(node.Name)
if nodeHealthData == nil {
return fmt.Errorf("health data doesn't exist for node %q", node.Name)
}
// Check eviction timeout against decisionTimestamp
logger := klog.FromContext(ctx)
switch observedReadyCondition.Status {
case v1.ConditionFalse:
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
enqueued, err := nc.evictPods(ctx, node, pods)
if err != nil {
return err
}
if enqueued {
logger.V(2).Info("Node is NotReady. Adding Pods on Node to eviction queue: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout",
"node", klog.KObj(node),
"decisionTimestamp", decisionTimestamp,
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
"podEvictionTimeout", nc.podEvictionTimeout,
)
}
}
case v1.ConditionUnknown:
if decisionTimestamp.After(nodeHealthData.probeTimestamp.Add(nc.podEvictionTimeout)) {
enqueued, err := nc.evictPods(ctx, node, pods)
if err != nil {
return err
}
if enqueued {
logger.V(2).Info("Node is unresponsive. Adding Pods on Node to eviction queues: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout-gracePeriod",
"node", klog.KObj(node),
"decisionTimestamp", decisionTimestamp,
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
"podEvictionTimeoutGracePeriod", nc.podEvictionTimeout-gracePeriod,
)
}
}
case v1.ConditionTrue:
if nc.cancelPodEviction(logger, node) {
logger.V(2).Info("Node is ready again, cancelled pod eviction", "node", klog.KObj(node))
}
}
return nil
}
// labelNodeDisruptionExclusion is a label on nodes that controls whether they are // labelNodeDisruptionExclusion is a label on nodes that controls whether they are
// excluded from being considered for disruption checks by the node controller. // excluded from being considered for disruption checks by the node controller.
const labelNodeDisruptionExclusion = "node.kubernetes.io/exclude-disruption" const labelNodeDisruptionExclusion = "node.kubernetes.io/exclude-disruption"
@ -1230,22 +1027,14 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
if allAreFullyDisrupted { if allAreFullyDisrupted {
logger.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode") logger.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode")
for i := range nodes { for i := range nodes {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(ctx, nodes[i]) _, err := nc.markNodeAsReachable(ctx, nodes[i])
if err != nil { if err != nil {
logger.Error(nil, "Failed to remove taints from Node", "node", klog.KObj(nodes[i])) logger.Error(nil, "Failed to remove taints from Node", "node", klog.KObj(nodes[i]))
} }
} else {
nc.cancelPodEviction(logger, nodes[i])
}
} }
// We stop all evictions. // We stop all evictions.
for k := range nc.zoneStates { for k := range nc.zoneStates {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[k].SwapLimiter(0) nc.zoneNoExecuteTainter[k].SwapLimiter(0)
} else {
nc.zonePodEvictor[k].SwapLimiter(0)
}
} }
for k := range nc.zoneStates { for k := range nc.zoneStates {
nc.zoneStates[k] = stateFullDisruption nc.zoneStates[k] = stateFullDisruption
@ -1312,7 +1101,7 @@ func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
// processPod is processing events of assigning pods to nodes. In particular: // processPod is processing events of assigning pods to nodes. In particular:
// 1. for NodeReady=true node, taint eviction for this pod will be cancelled // 1. for NodeReady=true node, taint eviction for this pod will be cancelled
// 2. for NodeReady=false or unknown node, taint eviction of pod will happen and pod will be marked as not ready // 2. for NodeReady=false or unknown node, taint eviction of pod will happen and pod will be marked as not ready
// 3. if node doesn't exist in cache, it will be skipped and handled later by doEvictionPass // 3. if node doesn't exist in cache, it will be skipped.
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) { func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
defer nc.podUpdateQueue.Done(podItem) defer nc.podUpdateQueue.Done(podItem)
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name) pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
@ -1331,12 +1120,11 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
nodeHealth := nc.nodeHealthMap.getDeepCopy(nodeName) nodeHealth := nc.nodeHealthMap.getDeepCopy(nodeName)
if nodeHealth == nil { if nodeHealth == nil {
// Node data is not gathered yet or node has beed removed in the meantime. // Node data is not gathered yet or node has been removed in the meantime.
// Pod will be handled by doEvictionPass method.
return return
} }
node, err := nc.nodeLister.Get(nodeName) _, err = nc.nodeLister.Get(nodeName)
if err != nil { if err != nil {
logger.Info("Failed to read node", "node", klog.KRef("", nodeName), "err", err) logger.Info("Failed to read node", "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem) nc.podUpdateQueue.AddRateLimited(podItem)
@ -1352,16 +1140,6 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
} }
pods := []*v1.Pod{pod} pods := []*v1.Pod{pod}
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager.
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
logger.Info("Unable to process pod eviction from node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem)
return
}
}
if currentReadyCondition.Status != v1.ConditionTrue { if currentReadyCondition.Status != v1.ConditionTrue {
if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil { if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
logger.Info("Unable to mark pod NotReady on node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err) logger.Info("Unable to mark pod NotReady on node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
@ -1373,27 +1151,13 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) { func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) {
switch state { switch state {
case stateNormal: case stateNormal:
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS) nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS)
} else {
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
}
case statePartialDisruption: case statePartialDisruption:
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter( nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize)) nc.enterPartialDisruptionFunc(zoneSize))
} else {
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
}
case stateFullDisruption: case stateFullDisruption:
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter( nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize)) nc.enterFullDisruptionFunc(zoneSize))
} else {
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize))
}
} }
} }
@ -1453,15 +1217,9 @@ func (nc *Controller) addPodEvictorForNewZone(logger klog.Logger, node *v1.Node)
zone := nodetopology.GetZoneKey(node) zone := nodetopology.GetZoneKey(node)
if _, found := nc.zoneStates[zone]; !found { if _, found := nc.zoneStates[zone]; !found {
nc.zoneStates[zone] = stateInitial nc.zoneStates[zone] = stateInitial
if !nc.runTaintManager {
nc.zonePodEvictor[zone] =
scheduler.NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
} else {
nc.zoneNoExecuteTainter[zone] = nc.zoneNoExecuteTainter[zone] =
scheduler.NewRateLimitedTimedQueue( scheduler.NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst)) flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
}
// Init the metric for the new zone. // Init the metric for the new zone.
logger.Info("Initializing eviction metric for zone", "zone", zone) logger.Info("Initializing eviction metric for zone", "zone", zone)
evictionsNumber.WithLabelValues(zone).Add(0) evictionsNumber.WithLabelValues(zone).Add(0)
@ -1469,50 +1227,6 @@ func (nc *Controller) addPodEvictorForNewZone(logger klog.Logger, node *v1.Node)
} }
} }
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
// returns true if an eviction was queued.
func (nc *Controller) cancelPodEviction(logger klog.Logger, node *v1.Node) bool {
zone := nodetopology.GetZoneKey(node)
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
}
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
if wasDeleting {
logger.V(2).Info("Cancelling pod Eviction on Node", "node", klog.KObj(node))
return true
}
return false
}
// evictPods:
// - adds node to evictor queue if the node is not marked as evicted.
// Returns false if the node name was already enqueued.
// - deletes pods immediately if node is already marked as evicted.
// Returns false, because the node wasn't added to the queue.
func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.Pod) (bool, error) {
status, ok := nc.nodeEvictionMap.getStatus(node.Name)
if ok && status == evicted {
// Node eviction already happened for this node.
// Handling immediate pod deletion.
_, err := controllerutil.DeletePods(ctx, nc.kubeClient, pods, nc.recorder, node.Name, string(node.UID), nc.daemonSetStore)
if err != nil {
return false, fmt.Errorf("unable to delete pods from node %q: %v", node.Name, err)
}
return false, nil
}
logger := klog.FromContext(ctx)
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
}
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
return nc.zonePodEvictor[nodetopology.GetZoneKey(node)].Add(node.Name, string(node.UID)), nil
}
func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStatus) bool { func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStatus) bool {
nc.evictorLock.Lock() nc.evictorLock.Lock()
defer nc.evictorLock.Unlock() defer nc.evictorLock.Unlock()

View File

@ -41,7 +41,6 @@ import (
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
testcore "k8s.io/client-go/testing" testcore "k8s.io/client-go/testing"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting" "k8s.io/klog/v2/ktesting"
kubeletapis "k8s.io/kubelet/pkg/apis" kubeletapis "k8s.io/kubelet/pkg/apis"
"k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller"
@ -89,29 +88,6 @@ type nodeLifecycleController struct {
daemonSetInformer appsinformers.DaemonSetInformer daemonSetInformer appsinformers.DaemonSetInformer
} }
// doEviction does the fake eviction and returns the status of eviction operation.
func (nc *nodeLifecycleController) doEviction(logger klog.Logger, fakeNodeHandler *testutil.FakeNodeHandler) bool {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
zones := testutil.GetZones(fakeNodeHandler)
for _, zone := range zones {
nc.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
uid, _ := value.UID.(string)
pods, _ := nc.getPodsAssignedToNode(value.Value)
controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
_ = nc.nodeEvictionMap.setStatus(value.Value, evicted)
return true, 0
})
}
for _, action := range fakeNodeHandler.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
return true
}
}
return false
}
func createNodeLease(nodeName string, renewTime metav1.MicroTime) *coordv1.Lease { func createNodeLease(nodeName string, renewTime metav1.MicroTime) *coordv1.Lease {
return &coordv1.Lease{ return &coordv1.Lease{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
@ -149,7 +125,6 @@ func (nc *nodeLifecycleController) syncNodeStore(fakeNodeHandler *testutil.FakeN
func newNodeLifecycleControllerFromClient( func newNodeLifecycleControllerFromClient(
ctx context.Context, ctx context.Context,
kubeClient clientset.Interface, kubeClient clientset.Interface,
podEvictionTimeout time.Duration,
evictionLimiterQPS float32, evictionLimiterQPS float32,
secondaryEvictionLimiterQPS float32, secondaryEvictionLimiterQPS float32,
largeClusterThreshold int32, largeClusterThreshold int32,
@ -157,7 +132,6 @@ func newNodeLifecycleControllerFromClient(
nodeMonitorGracePeriod time.Duration, nodeMonitorGracePeriod time.Duration,
nodeStartupGracePeriod time.Duration, nodeStartupGracePeriod time.Duration,
nodeMonitorPeriod time.Duration, nodeMonitorPeriod time.Duration,
useTaints bool,
) (*nodeLifecycleController, error) { ) (*nodeLifecycleController, error) {
factory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc()) factory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc())
@ -176,12 +150,10 @@ func newNodeLifecycleControllerFromClient(
nodeMonitorPeriod, nodeMonitorPeriod,
nodeStartupGracePeriod, nodeStartupGracePeriod,
nodeMonitorGracePeriod, nodeMonitorGracePeriod,
podEvictionTimeout,
evictionLimiterQPS, evictionLimiterQPS,
secondaryEvictionLimiterQPS, secondaryEvictionLimiterQPS,
largeClusterThreshold, largeClusterThreshold,
unhealthyZoneThreshold, unhealthyZoneThreshold,
useTaints,
) )
if err != nil { if err != nil {
return nil, err return nil, err
@ -197,7 +169,6 @@ func newNodeLifecycleControllerFromClient(
func TestMonitorNodeHealthEvictPods(t *testing.T) { func TestMonitorNodeHealthEvictPods(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
labels := map[string]string{ labels := map[string]string{
v1.LabelTopologyRegion: "region1", v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1", v1.LabelTopologyZone: "zone1",
@ -365,7 +336,6 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}),
}, },
daemonSets: nil, daemonSets: nil,
timeToPass: evictionTimeout,
newNodeStatus: v1.NodeStatus{ newNodeStatus: v1.NodeStatus{
Conditions: []v1.NodeCondition{ Conditions: []v1.NodeCondition{
{ {
@ -378,7 +348,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
}, },
}, },
secondNodeNewStatus: healthyNodeNewStatus, secondNodeNewStatus: healthyNodeNewStatus,
expectedEvictPods: false, expectedEvictPods: true,
description: "Node created long time ago, and kubelet posted NotReady for a short period of time.", description: "Node created long time ago, and kubelet posted NotReady for a short period of time.",
}, },
// Pod is ds-managed, and kubelet posted NotReady for a long period of time. // Pod is ds-managed, and kubelet posted NotReady for a long period of time.
@ -596,7 +566,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}),
}, },
daemonSets: nil, daemonSets: nil,
timeToPass: evictionTimeout - testNodeMonitorGracePeriod, timeToPass: testNodeMonitorGracePeriod,
newNodeStatus: v1.NodeStatus{ newNodeStatus: v1.NodeStatus{
Conditions: []v1.NodeCondition{ Conditions: []v1.NodeCondition{
{ {
@ -609,7 +579,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
}, },
}, },
secondNodeNewStatus: healthyNodeNewStatus, secondNodeNewStatus: healthyNodeNewStatus,
expectedEvictPods: false, expectedEvictPods: true,
description: "Node created long time ago, node controller posted Unknown for a short period of time.", description: "Node created long time ago, node controller posted Unknown for a short period of time.",
}, },
// Node created long time ago, node controller posted Unknown for a long period of time. // Node created long time ago, node controller posted Unknown for a long period of time.
@ -686,7 +656,6 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
item.fakeNodeHandler, item.fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -694,7 +663,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset)
@ -725,8 +694,8 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
zones := testutil.GetZones(item.fakeNodeHandler) zones := testutil.GetZones(item.fakeNodeHandler)
logger, _ := ktesting.NewTestContext(t) logger, _ := ktesting.NewTestContext(t)
for _, zone := range zones { for _, zone := range zones {
if _, ok := nodeController.zonePodEvictor[zone]; ok { if _, ok := nodeController.zoneNoExecuteTainter[zone]; ok {
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) { nodeController.zoneNoExecuteTainter[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
nodeUID, _ := value.UID.(string) nodeUID, _ := value.UID.(string)
pods, err := nodeController.getPodsAssignedToNode(value.Value) pods, err := nodeController.getPodsAssignedToNode(value.Value)
if err != nil { if err != nil {
@ -757,7 +726,6 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
func TestPodStatusChange(t *testing.T) { func TestPodStatusChange(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady // Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
// we need second healthy node in tests. Because of how the tests are written we need to update // we need second healthy node in tests. Because of how the tests are written we need to update
@ -856,7 +824,6 @@ func TestPodStatusChange(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
item.fakeNodeHandler, item.fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -864,7 +831,7 @@ func TestPodStatusChange(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset)
@ -888,7 +855,7 @@ func TestPodStatusChange(t *testing.T) {
zones := testutil.GetZones(item.fakeNodeHandler) zones := testutil.GetZones(item.fakeNodeHandler)
logger, _ := ktesting.NewTestContext(t) logger, _ := ktesting.NewTestContext(t)
for _, zone := range zones { for _, zone := range zones {
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) { nodeController.zoneNoExecuteTainter[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
nodeUID, _ := value.UID.(string) nodeUID, _ := value.UID.(string)
pods, err := nodeController.getPodsAssignedToNode(value.Value) pods, err := nodeController.getPodsAssignedToNode(value.Value)
if err != nil { if err != nil {
@ -916,574 +883,6 @@ func TestPodStatusChange(t *testing.T) {
} }
} }
func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
timeToPass := 60 * time.Minute
// Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
// we need second healthy node in tests. Because of how the tests are written we need to update
// the status of this Node.
healthyNodeNewStatus := v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 13, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
}
unhealthyNodeNewStatus := v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
}
table := []struct {
nodeList []*v1.Node
podList []v1.Pod
updatedNodeStatuses []v1.NodeStatus
expectedInitialStates map[string]ZoneState
expectedFollowingStates map[string]ZoneState
expectedEvictPods bool
description string
}{
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Only zone is down - eviction shouldn't take place
{
nodeList: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []v1.Pod{*testutil.NewPod("pod0", "node0")},
updatedNodeStatuses: []v1.NodeStatus{
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
},
expectedInitialStates: map[string]ZoneState{testutil.CreateZoneID("region1", "zone1"): stateFullDisruption},
expectedFollowingStates: map[string]ZoneState{testutil.CreateZoneID("region1", "zone1"): stateFullDisruption},
expectedEvictPods: false,
description: "Network Disruption: Only zone is down - eviction shouldn't take place.",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Both zones down - eviction shouldn't take place
{
nodeList: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region2",
v1.LabelTopologyZone: "zone2",
v1.LabelFailureDomainBetaRegion: "region2",
v1.LabelFailureDomainBetaZone: "zone2",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []v1.Pod{*testutil.NewPod("pod0", "node0")},
updatedNodeStatuses: []v1.NodeStatus{
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
},
expectedInitialStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
testutil.CreateZoneID("region2", "zone2"): stateFullDisruption,
},
expectedFollowingStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
testutil.CreateZoneID("region2", "zone2"): stateFullDisruption,
},
expectedEvictPods: false,
description: "Network Disruption: Both zones down - eviction shouldn't take place.",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// One zone is down - eviction should take place
{
nodeList: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone2",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone2",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []v1.Pod{*testutil.NewPod("pod0", "node0")},
updatedNodeStatuses: []v1.NodeStatus{
unhealthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
testutil.CreateZoneID("region1", "zone2"): stateNormal,
},
expectedFollowingStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
testutil.CreateZoneID("region1", "zone2"): stateNormal,
},
expectedEvictPods: true,
description: "Network Disruption: One zone is down - eviction should take place.",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period
// of on first Node, eviction should stop even though Node with label
// node.kubernetes.io/exclude-disruption is healthy.
{
nodeList: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-master",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
labelNodeDisruptionExclusion: "",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []v1.Pod{*testutil.NewPod("pod0", "node0")},
updatedNodeStatuses: []v1.NodeStatus{
unhealthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
},
expectedFollowingStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
},
expectedEvictPods: false,
description: "NetworkDisruption: eviction should stop, only Node with label node.kubernetes.io/exclude-disruption is healthy",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Initially both zones down, one comes back - eviction should take place
{
nodeList: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone2",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone2",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []v1.Pod{*testutil.NewPod("pod0", "node0")},
updatedNodeStatuses: []v1.NodeStatus{
unhealthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
testutil.CreateZoneID("region1", "zone2"): stateFullDisruption,
},
expectedFollowingStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): stateFullDisruption,
testutil.CreateZoneID("region1", "zone2"): stateNormal,
},
expectedEvictPods: true,
description: "Initially both zones down, one comes back - eviction should take place",
},
// NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
// Zone is partially disrupted - eviction should take place
{
nodeList: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node3",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node4",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
v1.LabelTopologyRegion: "region1",
v1.LabelTopologyZone: "zone1",
v1.LabelFailureDomainBetaRegion: "region1",
v1.LabelFailureDomainBetaZone: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
podList: []v1.Pod{*testutil.NewPod("pod0", "node0")},
updatedNodeStatuses: []v1.NodeStatus{
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
unhealthyNodeNewStatus,
healthyNodeNewStatus,
healthyNodeNewStatus,
},
expectedInitialStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): statePartialDisruption,
},
expectedFollowingStates: map[string]ZoneState{
testutil.CreateZoneID("region1", "zone1"): statePartialDisruption,
},
expectedEvictPods: true,
description: "Zone is partially disrupted - eviction should take place.",
},
}
for _, item := range table {
fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: item.nodeList,
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: item.podList}),
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
testRateLimiterQPS,
testLargeClusterThreshold,
testUnhealthyThreshold,
testNodeMonitorGracePeriod,
testNodeStartupGracePeriod,
testNodeMonitorPeriod,
false)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
nodeController.enterPartialDisruptionFunc = func(nodeNum int) float32 {
return testRateLimiterQPS
}
nodeController.enterFullDisruptionFunc = func(nodeNum int) float32 {
return testRateLimiterQPS
}
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err)
}
for zone, state := range item.expectedInitialStates {
if state != nodeController.zoneStates[zone] {
t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
}
}
nodeController.now = func() metav1.Time { return metav1.Time{Time: fakeNow.Add(timeToPass)} }
for i := range item.updatedNodeStatuses {
fakeNodeHandler.Existing[i].Status = item.updatedNodeStatuses[i]
}
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err)
}
for zone, state := range item.expectedFollowingStates {
if state != nodeController.zoneStates[zone] {
t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
}
}
var podEvicted bool
start := time.Now()
// Infinite loop, used for retrying in case ratelimiter fails to reload for Try function.
// this breaks when we have the status that we need for test case or when we don't see the
// intended result after 1 minute.
logger, _ := ktesting.NewTestContext(t)
for {
podEvicted = nodeController.doEviction(logger, fakeNodeHandler)
if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute {
break
}
}
if item.expectedEvictPods != podEvicted {
t.Errorf("%v: expected pod eviction: %+v, got %+v", item.description, item.expectedEvictPods, podEvicted)
}
}
}
func TestMonitorNodeHealthUpdateStatus(t *testing.T) { func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct { table := []struct {
@ -1707,7 +1106,6 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
item.fakeNodeHandler, item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -1715,7 +1113,7 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset)
@ -2251,7 +1649,6 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
item.fakeNodeHandler, item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -2259,7 +1656,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset)
@ -2416,7 +1813,6 @@ func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
item.fakeNodeHandler, item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -2424,7 +1820,7 @@ func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(item.fakeNodeHandler.Clientset)
@ -2518,15 +1914,13 @@ func TestMonitorNodeHealthMarkPodsNotReadyWithWorkerSize(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
testUnhealthyThreshold, testUnhealthyThreshold,
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod)
false)
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
@ -2722,7 +2116,6 @@ func TestMonitorNodeHealthMarkPodsNotReadyRetry(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
item.fakeNodeHandler, item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -2730,7 +2123,7 @@ func TestMonitorNodeHealthMarkPodsNotReadyRetry(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
if item.updateReactor != nil { if item.updateReactor != nil {
item.fakeNodeHandler.Clientset.PrependReactor("update", "pods", item.updateReactor) item.fakeNodeHandler.Clientset.PrependReactor("update", "pods", item.updateReactor)
} }
@ -2770,7 +2163,6 @@ func TestApplyNoExecuteTaints(t *testing.T) {
t.Skip("Skipping test on Windows.") t.Skip("Skipping test on Windows.")
} }
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -2861,7 +2253,6 @@ func TestApplyNoExecuteTaints(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -2869,7 +2260,7 @@ func TestApplyNoExecuteTaints(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
true) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
@ -2926,7 +2317,6 @@ func TestApplyNoExecuteTaints(t *testing.T) {
// TestApplyNoExecuteTaintsToNodesEnqueueTwice ensures we taint every node with NoExecute even if enqueued twice // TestApplyNoExecuteTaintsToNodesEnqueueTwice ensures we taint every node with NoExecute even if enqueued twice
func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) { func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -3016,7 +2406,6 @@ func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -3024,7 +2413,7 @@ func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
true) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
@ -3159,7 +2548,6 @@ func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
func TestSwapUnreachableNotReadyTaints(t *testing.T) { func TestSwapUnreachableNotReadyTaints(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -3213,7 +2601,6 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
}, },
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}),
} }
timeToPass := evictionTimeout
newNodeStatus := v1.NodeStatus{ newNodeStatus := v1.NodeStatus{
Conditions: []v1.NodeCondition{ Conditions: []v1.NodeCondition{
{ {
@ -3241,7 +2628,6 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -3249,7 +2635,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
true) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
@ -3276,7 +2662,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
t.Errorf("Can't find taint %v in %v", originalTaint, node0.Spec.Taints) t.Errorf("Can't find taint %v in %v", originalTaint, node0.Spec.Taints)
} }
nodeController.now = func() metav1.Time { return metav1.Time{Time: fakeNow.Add(timeToPass)} } nodeController.now = func() metav1.Time { return metav1.Time{Time: fakeNow.Time} }
node0.Status = newNodeStatus node0.Status = newNodeStatus
node1.Status = healthyNodeNewStatus node1.Status = healthyNodeNewStatus
@ -3313,7 +2699,6 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
func TestTaintsNodeByCondition(t *testing.T) { func TestTaintsNodeByCondition(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -3346,7 +2731,6 @@ func TestTaintsNodeByCondition(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -3354,7 +2738,7 @@ func TestTaintsNodeByCondition(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
true) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
@ -3549,7 +2933,6 @@ func TestNodeEventGeneration(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -3557,7 +2940,7 @@ func TestNodeEventGeneration(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
false) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
fakeRecorder := testutil.NewFakeRecorder() fakeRecorder := testutil.NewFakeRecorder()
nodeController.recorder = fakeRecorder nodeController.recorder = fakeRecorder
@ -3590,7 +2973,6 @@ func TestNodeEventGeneration(t *testing.T) {
func TestReconcileNodeLabels(t *testing.T) { func TestReconcileNodeLabels(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -3623,7 +3005,6 @@ func TestReconcileNodeLabels(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -3631,7 +3012,7 @@ func TestReconcileNodeLabels(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
true) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)
@ -3740,7 +3121,6 @@ func TestReconcileNodeLabels(t *testing.T) {
func TestTryUpdateNodeHealth(t *testing.T) { func TestTryUpdateNodeHealth(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC) fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
fakeOld := metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC) fakeOld := metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -3767,7 +3147,6 @@ func TestTryUpdateNodeHealth(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient( nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(), context.TODO(),
fakeNodeHandler, fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS, testRateLimiterQPS,
testRateLimiterQPS, testRateLimiterQPS,
testLargeClusterThreshold, testLargeClusterThreshold,
@ -3775,7 +3154,7 @@ func TestTryUpdateNodeHealth(t *testing.T) {
testNodeMonitorGracePeriod, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeStartupGracePeriod,
testNodeMonitorPeriod, testNodeMonitorPeriod,
true) )
nodeController.now = func() metav1.Time { return fakeNow } nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder() nodeController.recorder = testutil.NewFakeRecorder()
nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset) nodeController.getPodsAssignedToNode = fakeGetPodsAssignedToNode(fakeNodeHandler.Clientset)

View File

@ -52554,13 +52554,6 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_NodeLifecycleControlle
Description: "NodeLifecycleControllerConfiguration contains elements describing NodeLifecycleController.", Description: "NodeLifecycleControllerConfiguration contains elements describing NodeLifecycleController.",
Type: []string{"object"}, Type: []string{"object"},
Properties: map[string]spec.Schema{ Properties: map[string]spec.Schema{
"EnableTaintManager": {
SchemaProps: spec.SchemaProps{
Description: "If set to true enables NoExecute Taints and will evict all not-tolerating Pod running on Nodes tainted with this kind of Taints.",
Type: []string{"boolean"},
Format: "",
},
},
"NodeEvictionRate": { "NodeEvictionRate": {
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy", Description: "nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy",
@ -52615,7 +52608,7 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_NodeLifecycleControlle
}, },
}, },
}, },
Required: []string{"EnableTaintManager", "NodeEvictionRate", "SecondaryNodeEvictionRate", "NodeStartupGracePeriod", "NodeMonitorGracePeriod", "PodEvictionTimeout", "LargeClusterSizeThreshold", "UnhealthyZoneThreshold"}, Required: []string{"NodeEvictionRate", "SecondaryNodeEvictionRate", "NodeStartupGracePeriod", "NodeMonitorGracePeriod", "PodEvictionTimeout", "LargeClusterSizeThreshold", "UnhealthyZoneThreshold"},
}, },
}, },
Dependencies: []string{ Dependencies: []string{

View File

@ -383,9 +383,6 @@ type NodeIPAMControllerConfiguration struct {
// NodeLifecycleControllerConfiguration contains elements describing NodeLifecycleController. // NodeLifecycleControllerConfiguration contains elements describing NodeLifecycleController.
type NodeLifecycleControllerConfiguration struct { type NodeLifecycleControllerConfiguration struct {
// If set to true enables NoExecute Taints and will evict all not-tolerating
// Pod running on Nodes tainted with this kind of Taints.
EnableTaintManager *bool
// nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy // nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy
NodeEvictionRate float32 NodeEvictionRate float32
// secondaryNodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy // secondaryNodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy

View File

@ -312,7 +312,7 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa
out.CronJobController = in.CronJobController out.CronJobController = in.CronJobController
out.NamespaceController = in.NamespaceController out.NamespaceController = in.NamespaceController
out.NodeIPAMController = in.NodeIPAMController out.NodeIPAMController = in.NodeIPAMController
in.NodeLifecycleController.DeepCopyInto(&out.NodeLifecycleController) out.NodeLifecycleController = in.NodeLifecycleController
in.PersistentVolumeBinderController.DeepCopyInto(&out.PersistentVolumeBinderController) in.PersistentVolumeBinderController.DeepCopyInto(&out.PersistentVolumeBinderController)
out.PodGCController = in.PodGCController out.PodGCController = in.PodGCController
out.ReplicaSetController = in.ReplicaSetController out.ReplicaSetController = in.ReplicaSetController
@ -378,11 +378,6 @@ func (in *NodeIPAMControllerConfiguration) DeepCopy() *NodeIPAMControllerConfigu
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeLifecycleControllerConfiguration) DeepCopyInto(out *NodeLifecycleControllerConfiguration) { func (in *NodeLifecycleControllerConfiguration) DeepCopyInto(out *NodeLifecycleControllerConfiguration) {
*out = *in *out = *in
if in.EnableTaintManager != nil {
in, out := &in.EnableTaintManager, &out.EnableTaintManager
*out = new(bool)
**out = **in
}
out.NodeStartupGracePeriod = in.NodeStartupGracePeriod out.NodeStartupGracePeriod = in.NodeStartupGracePeriod
out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod out.NodeMonitorGracePeriod = in.NodeMonitorGracePeriod
out.PodEvictionTimeout = in.PodEvictionTimeout out.PodEvictionTimeout = in.PodEvictionTimeout

View File

@ -126,12 +126,10 @@ func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) {
1*time.Second, // Node monitor grace period 1*time.Second, // Node monitor grace period
time.Minute, // Node startup grace period time.Minute, // Node startup grace period
time.Millisecond, // Node monitor period time.Millisecond, // Node monitor period
1, // Pod eviction timeout
100, // Eviction limiter QPS 100, // Eviction limiter QPS
100, // Secondary eviction limiter QPS 100, // Secondary eviction limiter QPS
50, // Large cluster threshold 50, // Large cluster threshold
0.55, // Unhealthy zone threshold 0.55, // Unhealthy zone threshold
true, // Run taint manager
) )
if err != nil { if err != nil {
t.Fatalf("Failed to create node controller: %v", err) t.Fatalf("Failed to create node controller: %v", err)
@ -279,12 +277,10 @@ func TestTaintBasedEvictions(t *testing.T) {
1*time.Second, // Node monitor grace period 1*time.Second, // Node monitor grace period
time.Minute, // Node startup grace period time.Minute, // Node startup grace period
time.Millisecond, // Node monitor period time.Millisecond, // Node monitor period
time.Second, // Pod eviction timeout
100, // Eviction limiter QPS 100, // Eviction limiter QPS
100, // Secondary eviction limiter QPS 100, // Secondary eviction limiter QPS
50, // Large cluster threshold 50, // Large cluster threshold
0.55, // Unhealthy zone threshold 0.55, // Unhealthy zone threshold
true, // Run taint manager
) )
if err != nil { if err != nil {
t.Fatalf("Failed to create node controller: %v", err) t.Fatalf("Failed to create node controller: %v", err)

View File

@ -95,12 +95,10 @@ func TestTaintNodeByCondition(t *testing.T) {
time.Hour, // Node monitor grace period time.Hour, // Node monitor grace period
time.Second, // Node startup grace period time.Second, // Node startup grace period
time.Second, // Node monitor period time.Second, // Node monitor period
time.Second, // Pod eviction timeout
100, // Eviction limiter QPS 100, // Eviction limiter QPS
100, // Secondary eviction limiter QPS 100, // Secondary eviction limiter QPS
100, // Large cluster threshold 100, // Large cluster threshold
100, // Unhealthy zone threshold 100, // Unhealthy zone threshold
true, // Run taint manager
) )
if err != nil { if err != nil {
t.Errorf("Failed to create node controller: %v", err) t.Errorf("Failed to create node controller: %v", err)