mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 03:11:40 +00:00
Expose flags for new NodeEviction logic in NodeController
This commit is contained in:
parent
68327f76bf
commit
4cf698ef04
@ -236,7 +236,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
|
|||||||
glog.Warningf("Unsuccessful parsing of service CIDR %v: %v", s.ServiceCIDR, err)
|
glog.Warningf("Unsuccessful parsing of service CIDR %v: %v", s.ServiceCIDR, err)
|
||||||
}
|
}
|
||||||
nodeController, err := nodecontroller.NewNodeController(sharedInformers.Pods().Informer(), cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-controller")),
|
nodeController, err := nodecontroller.NewNodeController(sharedInformers.Pods().Informer(), cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-controller")),
|
||||||
s.PodEvictionTimeout.Duration, s.DeletingPodsQps, s.NodeMonitorGracePeriod.Duration,
|
s.PodEvictionTimeout.Duration, s.NodeEvictionRate, s.SecondaryNodeEvictionRate, s.LargeClusterSizeThreshold, s.UnhealthyZoneThreshold, s.NodeMonitorGracePeriod.Duration,
|
||||||
s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, serviceCIDR,
|
s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, serviceCIDR,
|
||||||
int(s.NodeCIDRMaskSize), s.AllocateNodeCIDRs)
|
int(s.NodeCIDRMaskSize), s.AllocateNodeCIDRs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -141,6 +141,7 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
|
|||||||
fs.DurationVar(&s.DeploymentControllerSyncPeriod.Duration, "deployment-controller-sync-period", s.DeploymentControllerSyncPeriod.Duration, "Period for syncing the deployments.")
|
fs.DurationVar(&s.DeploymentControllerSyncPeriod.Duration, "deployment-controller-sync-period", s.DeploymentControllerSyncPeriod.Duration, "Period for syncing the deployments.")
|
||||||
fs.DurationVar(&s.PodEvictionTimeout.Duration, "pod-eviction-timeout", s.PodEvictionTimeout.Duration, "The grace period for deleting pods on failed nodes.")
|
fs.DurationVar(&s.PodEvictionTimeout.Duration, "pod-eviction-timeout", s.PodEvictionTimeout.Duration, "The grace period for deleting pods on failed nodes.")
|
||||||
fs.Float32Var(&s.DeletingPodsQps, "deleting-pods-qps", 0.1, "Number of nodes per second on which pods are deleted in case of node failure.")
|
fs.Float32Var(&s.DeletingPodsQps, "deleting-pods-qps", 0.1, "Number of nodes per second on which pods are deleted in case of node failure.")
|
||||||
|
fs.MarkDeprecated("deleting-pods-qps", "This flag is currently no-op and will be deleted.")
|
||||||
fs.Int32Var(&s.DeletingPodsBurst, "deleting-pods-burst", 0, "Number of nodes on which pods are bursty deleted in case of node failure. For more details look into RateLimiter.")
|
fs.Int32Var(&s.DeletingPodsBurst, "deleting-pods-burst", 0, "Number of nodes on which pods are bursty deleted in case of node failure. For more details look into RateLimiter.")
|
||||||
fs.MarkDeprecated("deleting-pods-burst", "This flag is currently no-op and will be deleted.")
|
fs.MarkDeprecated("deleting-pods-burst", "This flag is currently no-op and will be deleted.")
|
||||||
fs.Int32Var(&s.RegisterRetryCount, "register-retry-count", s.RegisterRetryCount, ""+
|
fs.Int32Var(&s.RegisterRetryCount, "register-retry-count", s.RegisterRetryCount, ""+
|
||||||
@ -173,5 +174,10 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
|
|||||||
fs.DurationVar(&s.ControllerStartInterval.Duration, "controller-start-interval", s.ControllerStartInterval.Duration, "Interval between starting controller managers.")
|
fs.DurationVar(&s.ControllerStartInterval.Duration, "controller-start-interval", s.ControllerStartInterval.Duration, "Interval between starting controller managers.")
|
||||||
fs.BoolVar(&s.EnableGarbageCollector, "enable-garbage-collector", s.EnableGarbageCollector, "Enables the generic garbage collector. MUST be synced with the corresponding flag of the kube-apiserver. WARNING: the generic garbage collector is an alpha feature.")
|
fs.BoolVar(&s.EnableGarbageCollector, "enable-garbage-collector", s.EnableGarbageCollector, "Enables the generic garbage collector. MUST be synced with the corresponding flag of the kube-apiserver. WARNING: the generic garbage collector is an alpha feature.")
|
||||||
fs.Int32Var(&s.ConcurrentGCSyncs, "concurrent-gc-syncs", s.ConcurrentGCSyncs, "The number of garbage collector workers that are allowed to sync concurrently.")
|
fs.Int32Var(&s.ConcurrentGCSyncs, "concurrent-gc-syncs", s.ConcurrentGCSyncs, "The number of garbage collector workers that are allowed to sync concurrently.")
|
||||||
|
fs.Float32Var(&s.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.")
|
||||||
|
fs.Float32Var(&s.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
|
||||||
|
fs.Int32Var(&s.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.")
|
||||||
|
fs.Float32Var(&s.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ")
|
||||||
|
|
||||||
leaderelection.BindFlags(&s.LeaderElection, fs)
|
leaderelection.BindFlags(&s.LeaderElection, fs)
|
||||||
}
|
}
|
||||||
|
@ -159,7 +159,7 @@ func (s *CMServer) Run(_ []string) error {
|
|||||||
_, clusterCIDR, _ := net.ParseCIDR(s.ClusterCIDR)
|
_, clusterCIDR, _ := net.ParseCIDR(s.ClusterCIDR)
|
||||||
_, serviceCIDR, _ := net.ParseCIDR(s.ServiceCIDR)
|
_, serviceCIDR, _ := net.ParseCIDR(s.ServiceCIDR)
|
||||||
nodeController, err := nodecontroller.NewNodeControllerFromClient(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-controller")),
|
nodeController, err := nodecontroller.NewNodeControllerFromClient(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "node-controller")),
|
||||||
s.PodEvictionTimeout.Duration, s.DeletingPodsQps,
|
s.PodEvictionTimeout.Duration, s.NodeEvictionRate, s.SecondaryNodeEvictionRate, s.LargeClusterSizeThreshold, s.UnhealthyZoneThreshold,
|
||||||
s.NodeMonitorGracePeriod.Duration, s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, serviceCIDR, int(s.NodeCIDRMaskSize), s.AllocateNodeCIDRs)
|
s.NodeMonitorGracePeriod.Duration, s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, serviceCIDR, int(s.NodeCIDRMaskSize), s.AllocateNodeCIDRs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Fatalf("Failed to initialize nodecontroller: %v", err)
|
glog.Fatalf("Failed to initialize nodecontroller: %v", err)
|
||||||
|
@ -276,6 +276,7 @@ kubelet-sync-frequency
|
|||||||
kubelet-timeout
|
kubelet-timeout
|
||||||
kubernetes-service-node-port
|
kubernetes-service-node-port
|
||||||
label-columns
|
label-columns
|
||||||
|
large-cluster-size-threshold
|
||||||
last-release-pr
|
last-release-pr
|
||||||
leader-elect
|
leader-elect
|
||||||
leader-elect-lease-duration
|
leader-elect-lease-duration
|
||||||
@ -335,6 +336,7 @@ network-plugin-dir
|
|||||||
no-headers
|
no-headers
|
||||||
no-suggestions
|
no-suggestions
|
||||||
node-cidr-mask-size
|
node-cidr-mask-size
|
||||||
|
node-eviction-rate
|
||||||
node-instance-group
|
node-instance-group
|
||||||
node-ip
|
node-ip
|
||||||
node-labels
|
node-labels
|
||||||
@ -432,6 +434,7 @@ scheduler-name
|
|||||||
schema-cache-dir
|
schema-cache-dir
|
||||||
scopes
|
scopes
|
||||||
seccomp-profile-root
|
seccomp-profile-root
|
||||||
|
secondary-node-eviction-rate
|
||||||
secure-port
|
secure-port
|
||||||
serialize-image-pulls
|
serialize-image-pulls
|
||||||
server-start-timeout
|
server-start-timeout
|
||||||
@ -491,6 +494,7 @@ ttl-secs
|
|||||||
type-src
|
type-src
|
||||||
udp-port
|
udp-port
|
||||||
udp-timeout
|
udp-timeout
|
||||||
|
unhealthy-zone-threshold
|
||||||
unix-socket
|
unix-socket
|
||||||
update-period
|
update-period
|
||||||
upgrade-target
|
upgrade-target
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -547,7 +547,7 @@ type KubeControllerManagerConfiguration struct {
|
|||||||
DeploymentControllerSyncPeriod unversioned.Duration `json:"deploymentControllerSyncPeriod"`
|
DeploymentControllerSyncPeriod unversioned.Duration `json:"deploymentControllerSyncPeriod"`
|
||||||
// podEvictionTimeout is the grace period for deleting pods on failed nodes.
|
// podEvictionTimeout is the grace period for deleting pods on failed nodes.
|
||||||
PodEvictionTimeout unversioned.Duration `json:"podEvictionTimeout"`
|
PodEvictionTimeout unversioned.Duration `json:"podEvictionTimeout"`
|
||||||
// deletingPodsQps is the number of nodes per second on which pods are deleted in
|
// DEPRECATED: deletingPodsQps is the number of nodes per second on which pods are deleted in
|
||||||
// case of node failure.
|
// case of node failure.
|
||||||
DeletingPodsQps float32 `json:"deletingPodsQps"`
|
DeletingPodsQps float32 `json:"deletingPodsQps"`
|
||||||
// DEPRECATED: deletingPodsBurst is the number of nodes on which pods are bursty deleted in
|
// DEPRECATED: deletingPodsBurst is the number of nodes on which pods are bursty deleted in
|
||||||
@ -613,6 +613,15 @@ type KubeControllerManagerConfiguration struct {
|
|||||||
// concurrentGCSyncs is the number of garbage collector workers that are
|
// concurrentGCSyncs is the number of garbage collector workers that are
|
||||||
// allowed to sync concurrently.
|
// allowed to sync concurrently.
|
||||||
ConcurrentGCSyncs int32 `json:"concurrentGCSyncs"`
|
ConcurrentGCSyncs int32 `json:"concurrentGCSyncs"`
|
||||||
|
// nodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is healthy
|
||||||
|
NodeEvictionRate float32 `json:"nodeEvictionRate"`
|
||||||
|
// secondaryNodeEvictionRate is the number of nodes per second on which pods are deleted in case of node failure when a zone is unhealty
|
||||||
|
SecondaryNodeEvictionRate float32 `json:"secondaryNodeEvictionRate"`
|
||||||
|
// secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold
|
||||||
|
LargeClusterSizeThreshold int32 `json:"largeClusterSizeThreshold"`
|
||||||
|
// Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least
|
||||||
|
// unhealthyZoneThreshold (no less than 3) of Nodes in the zone are NotReady
|
||||||
|
UnhealthyZoneThreshold float32 `json:"unhealthyZoneThreshold"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// VolumeConfiguration contains *all* enumerated flags meant to configure all volume
|
// VolumeConfiguration contains *all* enumerated flags meant to configure all volume
|
||||||
|
@ -119,6 +119,10 @@ func DeepCopy_componentconfig_KubeControllerManagerConfiguration(in interface{},
|
|||||||
out.ControllerStartInterval = in.ControllerStartInterval
|
out.ControllerStartInterval = in.ControllerStartInterval
|
||||||
out.EnableGarbageCollector = in.EnableGarbageCollector
|
out.EnableGarbageCollector = in.EnableGarbageCollector
|
||||||
out.ConcurrentGCSyncs = in.ConcurrentGCSyncs
|
out.ConcurrentGCSyncs = in.ConcurrentGCSyncs
|
||||||
|
out.NodeEvictionRate = in.NodeEvictionRate
|
||||||
|
out.SecondaryNodeEvictionRate = in.SecondaryNodeEvictionRate
|
||||||
|
out.LargeClusterSizeThreshold = in.LargeClusterSizeThreshold
|
||||||
|
out.UnhealthyZoneThreshold = in.UnhealthyZoneThreshold
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -40,31 +40,6 @@ const (
|
|||||||
LargeClusterThreshold = 20
|
LargeClusterThreshold = 20
|
||||||
)
|
)
|
||||||
|
|
||||||
// This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone.
|
|
||||||
// The zone is considered:
|
|
||||||
// - fullyDisrupted if there're no Ready Nodes,
|
|
||||||
// - partiallyDisrupted if more than 1/3 of Nodes (at least 3) are not Ready,
|
|
||||||
// - normal otherwise
|
|
||||||
func ComputeZoneState(nodeReadyConditions []*api.NodeCondition) zoneState {
|
|
||||||
readyNodes := 0
|
|
||||||
notReadyNodes := 0
|
|
||||||
for i := range nodeReadyConditions {
|
|
||||||
if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == api.ConditionTrue {
|
|
||||||
readyNodes++
|
|
||||||
} else {
|
|
||||||
notReadyNodes++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
switch {
|
|
||||||
case readyNodes == 0 && notReadyNodes > 0:
|
|
||||||
return stateFullDisruption
|
|
||||||
case notReadyNodes > 2 && 2*notReadyNodes > readyNodes:
|
|
||||||
return statePartialDisruption
|
|
||||||
default:
|
|
||||||
return stateNormal
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// cleanupOrphanedPods deletes pods that are bound to nodes that don't
|
// cleanupOrphanedPods deletes pods that are bound to nodes that don't
|
||||||
// exist.
|
// exist.
|
||||||
func cleanupOrphanedPods(pods []*api.Pod, nodeStore cache.Store, forcefulDeletePodFunc func(*api.Pod) error) {
|
func cleanupOrphanedPods(pods []*api.Pod, nodeStore cache.Store, forcefulDeletePodFunc func(*api.Pod) error) {
|
||||||
@ -336,15 +311,3 @@ func terminatePods(kubeClient clientset.Interface, recorder record.EventRecorder
|
|||||||
}
|
}
|
||||||
return complete, nextAttempt, nil
|
return complete, nextAttempt, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func HealthyQPSFunc(nodeNum int, defaultQPS float32) float32 {
|
|
||||||
return defaultQPS
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the cluster is large make evictions slower, if they're small stop evictions altogether.
|
|
||||||
func ReducedQPSFunc(nodeNum int, defaultQPS float32) float32 {
|
|
||||||
if nodeNum > LargeClusterThreshold {
|
|
||||||
return defaultQPS / 10
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
@ -120,7 +120,6 @@ type NodeController struct {
|
|||||||
// workers that evicts pods from unresponsive nodes.
|
// workers that evicts pods from unresponsive nodes.
|
||||||
zonePodEvictor map[string]*RateLimitedTimedQueue
|
zonePodEvictor map[string]*RateLimitedTimedQueue
|
||||||
zoneTerminationEvictor map[string]*RateLimitedTimedQueue
|
zoneTerminationEvictor map[string]*RateLimitedTimedQueue
|
||||||
evictionLimiterQPS float32
|
|
||||||
podEvictionTimeout time.Duration
|
podEvictionTimeout time.Duration
|
||||||
// The maximum duration before a pod evicted from a node can be forcefully terminated.
|
// The maximum duration before a pod evicted from a node can be forcefully terminated.
|
||||||
maximumGracePeriod time.Duration
|
maximumGracePeriod time.Duration
|
||||||
@ -140,10 +139,14 @@ type NodeController struct {
|
|||||||
forcefullyDeletePod func(*api.Pod) error
|
forcefullyDeletePod func(*api.Pod) error
|
||||||
nodeExistsInCloudProvider func(string) (bool, error)
|
nodeExistsInCloudProvider func(string) (bool, error)
|
||||||
computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState
|
computeZoneStateFunc func(nodeConditions []*api.NodeCondition) zoneState
|
||||||
enterPartialDisruptionFunc func(nodeNum int, defaultQPS float32) float32
|
enterPartialDisruptionFunc func(nodeNum int) float32
|
||||||
enterFullDisruptionFunc func(nodeNum int, defaultQPS float32) float32
|
enterFullDisruptionFunc func(nodeNum int) float32
|
||||||
|
|
||||||
zoneStates map[string]zoneState
|
zoneStates map[string]zoneState
|
||||||
|
evictionLimiterQPS float32
|
||||||
|
secondaryEvictionLimiterQPS float32
|
||||||
|
largeClusterThreshold int32
|
||||||
|
unhealthyZoneThreshold float32
|
||||||
|
|
||||||
// internalPodInformer is used to hold a personal informer. If we're using
|
// internalPodInformer is used to hold a personal informer. If we're using
|
||||||
// a normal shared informer, then the informer will be started for us. If
|
// a normal shared informer, then the informer will be started for us. If
|
||||||
@ -163,6 +166,9 @@ func NewNodeController(
|
|||||||
kubeClient clientset.Interface,
|
kubeClient clientset.Interface,
|
||||||
podEvictionTimeout time.Duration,
|
podEvictionTimeout time.Duration,
|
||||||
evictionLimiterQPS float32,
|
evictionLimiterQPS float32,
|
||||||
|
secondaryEvictionLimiterQPS float32,
|
||||||
|
largeClusterThreshold int32,
|
||||||
|
unhealthyZoneThreshold float32,
|
||||||
nodeMonitorGracePeriod time.Duration,
|
nodeMonitorGracePeriod time.Duration,
|
||||||
nodeStartupGracePeriod time.Duration,
|
nodeStartupGracePeriod time.Duration,
|
||||||
nodeMonitorPeriod time.Duration,
|
nodeMonitorPeriod time.Duration,
|
||||||
@ -214,12 +220,15 @@ func NewNodeController(
|
|||||||
allocateNodeCIDRs: allocateNodeCIDRs,
|
allocateNodeCIDRs: allocateNodeCIDRs,
|
||||||
forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) },
|
forcefullyDeletePod: func(p *api.Pod) error { return forcefullyDeletePod(kubeClient, p) },
|
||||||
nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
|
nodeExistsInCloudProvider: func(nodeName string) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
|
||||||
enterPartialDisruptionFunc: ReducedQPSFunc,
|
|
||||||
enterFullDisruptionFunc: HealthyQPSFunc,
|
|
||||||
computeZoneStateFunc: ComputeZoneState,
|
|
||||||
evictionLimiterQPS: evictionLimiterQPS,
|
evictionLimiterQPS: evictionLimiterQPS,
|
||||||
|
secondaryEvictionLimiterQPS: secondaryEvictionLimiterQPS,
|
||||||
|
largeClusterThreshold: largeClusterThreshold,
|
||||||
|
unhealthyZoneThreshold: unhealthyZoneThreshold,
|
||||||
zoneStates: make(map[string]zoneState),
|
zoneStates: make(map[string]zoneState),
|
||||||
}
|
}
|
||||||
|
nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc
|
||||||
|
nc.enterFullDisruptionFunc = nc.HealthyQPSFunc
|
||||||
|
nc.computeZoneStateFunc = nc.ComputeZoneState
|
||||||
|
|
||||||
podInformer.AddEventHandler(framework.ResourceEventHandlerFuncs{
|
podInformer.AddEventHandler(framework.ResourceEventHandlerFuncs{
|
||||||
AddFunc: nc.maybeDeleteTerminatingPod,
|
AddFunc: nc.maybeDeleteTerminatingPod,
|
||||||
@ -336,6 +345,9 @@ func NewNodeControllerFromClient(
|
|||||||
kubeClient clientset.Interface,
|
kubeClient clientset.Interface,
|
||||||
podEvictionTimeout time.Duration,
|
podEvictionTimeout time.Duration,
|
||||||
evictionLimiterQPS float32,
|
evictionLimiterQPS float32,
|
||||||
|
secondaryEvictionLimiterQPS float32,
|
||||||
|
largeClusterThreshold int32,
|
||||||
|
unhealthyZoneThreshold float32,
|
||||||
nodeMonitorGracePeriod time.Duration,
|
nodeMonitorGracePeriod time.Duration,
|
||||||
nodeStartupGracePeriod time.Duration,
|
nodeStartupGracePeriod time.Duration,
|
||||||
nodeMonitorPeriod time.Duration,
|
nodeMonitorPeriod time.Duration,
|
||||||
@ -344,8 +356,9 @@ func NewNodeControllerFromClient(
|
|||||||
nodeCIDRMaskSize int,
|
nodeCIDRMaskSize int,
|
||||||
allocateNodeCIDRs bool) (*NodeController, error) {
|
allocateNodeCIDRs bool) (*NodeController, error) {
|
||||||
podInformer := informers.NewPodInformer(kubeClient, controller.NoResyncPeriodFunc())
|
podInformer := informers.NewPodInformer(kubeClient, controller.NoResyncPeriodFunc())
|
||||||
nc, err := NewNodeController(podInformer, cloud, kubeClient, podEvictionTimeout, evictionLimiterQPS, nodeMonitorGracePeriod,
|
nc, err := NewNodeController(podInformer, cloud, kubeClient, podEvictionTimeout, evictionLimiterQPS, secondaryEvictionLimiterQPS,
|
||||||
nodeStartupGracePeriod, nodeMonitorPeriod, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, allocateNodeCIDRs)
|
largeClusterThreshold, unhealthyZoneThreshold, nodeMonitorGracePeriod, nodeStartupGracePeriod, nodeMonitorPeriod, clusterCIDR,
|
||||||
|
serviceCIDR, nodeCIDRMaskSize, allocateNodeCIDRs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -650,14 +663,14 @@ func (nc *NodeController) setLimiterInZone(zone string, zoneSize int, state zone
|
|||||||
nc.zoneTerminationEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
|
nc.zoneTerminationEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
|
||||||
case statePartialDisruption:
|
case statePartialDisruption:
|
||||||
nc.zonePodEvictor[zone].SwapLimiter(
|
nc.zonePodEvictor[zone].SwapLimiter(
|
||||||
nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
nc.enterPartialDisruptionFunc(zoneSize))
|
||||||
nc.zoneTerminationEvictor[zone].SwapLimiter(
|
nc.zoneTerminationEvictor[zone].SwapLimiter(
|
||||||
nc.enterPartialDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
nc.enterPartialDisruptionFunc(zoneSize))
|
||||||
case stateFullDisruption:
|
case stateFullDisruption:
|
||||||
nc.zonePodEvictor[zone].SwapLimiter(
|
nc.zonePodEvictor[zone].SwapLimiter(
|
||||||
nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
nc.enterFullDisruptionFunc(zoneSize))
|
||||||
nc.zoneTerminationEvictor[zone].SwapLimiter(
|
nc.zoneTerminationEvictor[zone].SwapLimiter(
|
||||||
nc.enterFullDisruptionFunc(zoneSize, nc.evictionLimiterQPS))
|
nc.enterFullDisruptionFunc(zoneSize))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -871,3 +884,41 @@ func (nc *NodeController) evictPods(node *api.Node) bool {
|
|||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
|
return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Default value for cluster eviction rate - we take nodeNum for consistency with ReducedQPSFunc.
|
||||||
|
func (nc *NodeController) HealthyQPSFunc(nodeNum int) float32 {
|
||||||
|
return nc.evictionLimiterQPS
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the cluster is large make evictions slower, if they're small stop evictions altogether.
|
||||||
|
func (nc *NodeController) ReducedQPSFunc(nodeNum int) float32 {
|
||||||
|
if int32(nodeNum) > nc.largeClusterThreshold {
|
||||||
|
return nc.secondaryEvictionLimiterQPS
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function is expected to get a slice of NodeReadyConditions for all Nodes in a given zone.
|
||||||
|
// The zone is considered:
|
||||||
|
// - fullyDisrupted if there're no Ready Nodes,
|
||||||
|
// - partiallyDisrupted if at least than nc.unhealthyZoneThreshold percent of Nodes are not Ready,
|
||||||
|
// - normal otherwise
|
||||||
|
func (nc *NodeController) ComputeZoneState(nodeReadyConditions []*api.NodeCondition) zoneState {
|
||||||
|
readyNodes := 0
|
||||||
|
notReadyNodes := 0
|
||||||
|
for i := range nodeReadyConditions {
|
||||||
|
if nodeReadyConditions[i] != nil && nodeReadyConditions[i].Status == api.ConditionTrue {
|
||||||
|
readyNodes++
|
||||||
|
} else {
|
||||||
|
notReadyNodes++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case readyNodes == 0 && notReadyNodes > 0:
|
||||||
|
return stateFullDisruption
|
||||||
|
case notReadyNodes > 2 && float32(notReadyNodes)/float32(notReadyNodes+readyNodes) >= nc.unhealthyZoneThreshold:
|
||||||
|
return statePartialDisruption
|
||||||
|
default:
|
||||||
|
return stateNormal
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -36,6 +36,8 @@ const (
|
|||||||
testNodeStartupGracePeriod = 60 * time.Second
|
testNodeStartupGracePeriod = 60 * time.Second
|
||||||
testNodeMonitorPeriod = 5 * time.Second
|
testNodeMonitorPeriod = 5 * time.Second
|
||||||
testRateLimiterQPS = float32(10000)
|
testRateLimiterQPS = float32(10000)
|
||||||
|
testLargeClusterThreshold = 20
|
||||||
|
testUnhealtyThreshold = float32(0.55)
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
||||||
@ -461,7 +463,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
|
|||||||
|
|
||||||
for _, item := range table {
|
for _, item := range table {
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
|
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
|
||||||
evictionTimeout, testRateLimiterQPS, testNodeMonitorGracePeriod,
|
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
|
||||||
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
for _, ds := range item.daemonSets {
|
for _, ds := range item.daemonSets {
|
||||||
@ -978,13 +980,13 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
|
|||||||
Clientset: fake.NewSimpleClientset(&api.PodList{Items: item.podList}),
|
Clientset: fake.NewSimpleClientset(&api.PodList{Items: item.podList}),
|
||||||
}
|
}
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
|
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
|
||||||
evictionTimeout, testRateLimiterQPS, testNodeMonitorGracePeriod,
|
evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
|
||||||
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
nodeController.enterPartialDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 {
|
nodeController.enterPartialDisruptionFunc = func(nodeNum int) float32 {
|
||||||
return testRateLimiterQPS
|
return testRateLimiterQPS
|
||||||
}
|
}
|
||||||
nodeController.enterFullDisruptionFunc = func(nodeNum int, defaultQPS float32) float32 {
|
nodeController.enterFullDisruptionFunc = func(nodeNum int) float32 {
|
||||||
return testRateLimiterQPS
|
return testRateLimiterQPS
|
||||||
}
|
}
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
@ -1071,7 +1073,7 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
|
|||||||
deleteWaitChan: make(chan struct{}),
|
deleteWaitChan: make(chan struct{}),
|
||||||
}
|
}
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, fnh, 10*time.Minute,
|
nodeController, _ := NewNodeControllerFromClient(nil, fnh, 10*time.Minute,
|
||||||
testRateLimiterQPS,
|
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
|
||||||
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
|
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
|
||||||
testNodeMonitorPeriod, nil, nil, 0, false)
|
testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.cloud = &fakecloud.FakeCloud{}
|
nodeController.cloud = &fakecloud.FakeCloud{}
|
||||||
@ -1304,7 +1306,8 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, item := range table {
|
for i, item := range table {
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute, testRateLimiterQPS,
|
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute,
|
||||||
|
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
|
||||||
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
@ -1454,7 +1457,8 @@ func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, item := range table {
|
for i, item := range table {
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute, testRateLimiterQPS,
|
nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute,
|
||||||
|
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
|
||||||
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
if err := nodeController.monitorNodeStatus(); err != nil {
|
if err := nodeController.monitorNodeStatus(); err != nil {
|
||||||
@ -1536,7 +1540,8 @@ func TestNodeDeletion(t *testing.T) {
|
|||||||
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0"), *newPod("pod1", "node1")}}),
|
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0"), *newPod("pod1", "node1")}}),
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute, testRateLimiterQPS,
|
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute,
|
||||||
|
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
|
||||||
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
|
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
|
||||||
testNodeMonitorPeriod, nil, nil, 0, false)
|
testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
@ -1596,8 +1601,10 @@ func TestNodeEventGeneration(t *testing.T) {
|
|||||||
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute, testRateLimiterQPS,
|
nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute,
|
||||||
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
|
testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
|
||||||
|
testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
|
||||||
|
testNodeMonitorPeriod, nil, nil, 0, false)
|
||||||
nodeController.now = func() unversioned.Time { return fakeNow }
|
nodeController.now = func() unversioned.Time { return fakeNow }
|
||||||
fakeRecorder := NewFakeRecorder()
|
fakeRecorder := NewFakeRecorder()
|
||||||
nodeController.recorder = fakeRecorder
|
nodeController.recorder = fakeRecorder
|
||||||
@ -1707,7 +1714,7 @@ func TestCheckPod(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
nc, _ := NewNodeControllerFromClient(nil, nil, 0, 0, 0, 0, 0, nil, nil, 0, false)
|
nc, _ := NewNodeControllerFromClient(nil, nil, 0, 0, 0, 0, 0, 0, 0, 0, nil, nil, 0, false)
|
||||||
nc.nodeStore.Store = cache.NewStore(cache.MetaNamespaceKeyFunc)
|
nc.nodeStore.Store = cache.NewStore(cache.MetaNamespaceKeyFunc)
|
||||||
nc.nodeStore.Store.Add(&api.Node{
|
nc.nodeStore.Store.Add(&api.Node{
|
||||||
ObjectMeta: api.ObjectMeta{
|
ObjectMeta: api.ObjectMeta{
|
||||||
@ -1774,7 +1781,7 @@ func TestCleanupOrphanedPods(t *testing.T) {
|
|||||||
newPod("b", "bar"),
|
newPod("b", "bar"),
|
||||||
newPod("c", "gone"),
|
newPod("c", "gone"),
|
||||||
}
|
}
|
||||||
nc, _ := NewNodeControllerFromClient(nil, nil, 0, 0, 0, 0, 0, nil, nil, 0, false)
|
nc, _ := NewNodeControllerFromClient(nil, nil, 0, 0, 0, 0, 0, 0, 0, 0, nil, nil, 0, false)
|
||||||
|
|
||||||
nc.nodeStore.Store.Add(newNode("foo"))
|
nc.nodeStore.Store.Add(newNode("foo"))
|
||||||
nc.nodeStore.Store.Add(newNode("bar"))
|
nc.nodeStore.Store.Add(newNode("bar"))
|
||||||
|
Loading…
Reference in New Issue
Block a user