Move TaintBasedEvictions feature gates to GA

This commit is contained in:
skilxn-go 2020-01-23 16:50:58 +08:00 committed by skilxn-go
parent 672aa55ee4
commit 6b8fc8dc5e
8 changed files with 13 additions and 43 deletions

View File

@ -207,7 +207,6 @@ func startNodeLifecycleController(ctx ControllerContext) (http.Handler, bool, er
ctx.ComponentConfig.NodeLifecycleController.LargeClusterSizeThreshold, ctx.ComponentConfig.NodeLifecycleController.LargeClusterSizeThreshold,
ctx.ComponentConfig.NodeLifecycleController.UnhealthyZoneThreshold, ctx.ComponentConfig.NodeLifecycleController.UnhealthyZoneThreshold,
ctx.ComponentConfig.NodeLifecycleController.EnableTaintManager, ctx.ComponentConfig.NodeLifecycleController.EnableTaintManager,
utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions),
) )
if err != nil { if err != nil {
return nil, true, err return nil, true, err

View File

@ -351,10 +351,6 @@ type Controller struct {
// tainted nodes, if they're not tolerated. // tainted nodes, if they're not tolerated.
runTaintManager bool runTaintManager bool
// if set to true Controller will taint Nodes with 'TaintNodeNotReady' and 'TaintNodeUnreachable'
// taints instead of evicting Pods itself.
useTaintBasedEvictions bool
nodeUpdateQueue workqueue.Interface nodeUpdateQueue workqueue.Interface
podUpdateQueue workqueue.RateLimitingInterface podUpdateQueue workqueue.RateLimitingInterface
} }
@ -375,7 +371,6 @@ func NewNodeLifecycleController(
largeClusterThreshold int32, largeClusterThreshold int32,
unhealthyZoneThreshold float32, unhealthyZoneThreshold float32,
runTaintManager bool, runTaintManager bool,
useTaintBasedEvictions bool,
) (*Controller, error) { ) (*Controller, error) {
if kubeClient == nil { if kubeClient == nil {
@ -416,13 +411,9 @@ func NewNodeLifecycleController(
largeClusterThreshold: largeClusterThreshold, largeClusterThreshold: largeClusterThreshold,
unhealthyZoneThreshold: unhealthyZoneThreshold, unhealthyZoneThreshold: unhealthyZoneThreshold,
runTaintManager: runTaintManager, runTaintManager: runTaintManager,
useTaintBasedEvictions: useTaintBasedEvictions && runTaintManager,
nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"), nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"),
podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"), podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"),
} }
if useTaintBasedEvictions {
klog.Infof("Controller is using taint based evictions.")
}
nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc
nc.enterFullDisruptionFunc = nc.HealthyQPSFunc nc.enterFullDisruptionFunc = nc.HealthyQPSFunc
@ -580,7 +571,7 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh) go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh)
} }
if nc.useTaintBasedEvictions { if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated // Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints. // taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh) go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
@ -768,9 +759,7 @@ func (nc *Controller) doEvictionPass() {
// monitorNodeHealth verifies node health are constantly updated by kubelet, and // monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown". // if not, post "NodeReady==ConditionUnknown".
// For nodes who are not ready or not reachable for a long period of time. // This function will taint nodes who are not ready or not reachable for a long period of time.
// This function will taint them if TaintBasedEvictions feature was enabled.
// Otherwise, it would evict it directly.
func (nc *Controller) monitorNodeHealth() error { func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays // We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway. // comparing to state from etcd and there is eventual consistency anyway.
@ -789,7 +778,7 @@ func (nc *Controller) monitorNodeHealth() error {
nodeutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name)) nodeutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i] nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i]) nc.addPodEvictorForNewZone(added[i])
if nc.useTaintBasedEvictions { if nc.runTaintManager {
nc.markNodeAsReachable(added[i]) nc.markNodeAsReachable(added[i])
} else { } else {
nc.cancelPodEviction(added[i]) nc.cancelPodEviction(added[i])
@ -843,7 +832,7 @@ func (nc *Controller) monitorNodeHealth() error {
} }
continue continue
} }
if nc.useTaintBasedEvictions { if nc.runTaintManager {
nc.processTaintBaseEviction(node, &observedReadyCondition) nc.processTaintBaseEviction(node, &observedReadyCondition)
} else { } else {
if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil { if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil {
@ -1209,7 +1198,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
if allAreFullyDisrupted { if allAreFullyDisrupted {
klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.") klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes { for i := range nodes {
if nc.useTaintBasedEvictions { if nc.runTaintManager {
_, err := nc.markNodeAsReachable(nodes[i]) _, err := nc.markNodeAsReachable(nodes[i])
if err != nil { if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name) klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
@ -1220,7 +1209,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
} }
// We stop all evictions. // We stop all evictions.
for k := range nc.zoneStates { for k := range nc.zoneStates {
if nc.useTaintBasedEvictions { if nc.runTaintManager {
nc.zoneNoExecuteTainter[k].SwapLimiter(0) nc.zoneNoExecuteTainter[k].SwapLimiter(0)
} else { } else {
nc.zonePodEvictor[k].SwapLimiter(0) nc.zonePodEvictor[k].SwapLimiter(0)
@ -1332,7 +1321,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
pods := []*v1.Pod{pod} pods := []*v1.Pod{pod}
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController. // In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager. // Pods are processed by TaintManager.
if !nc.useTaintBasedEvictions { if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil { if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err) klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem) nc.podUpdateQueue.AddRateLimited(podItem)
@ -1351,13 +1340,13 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) { func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) {
switch state { switch state {
case stateNormal: case stateNormal:
if nc.useTaintBasedEvictions { if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS) nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS)
} else { } else {
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS) nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
} }
case statePartialDisruption: case statePartialDisruption:
if nc.useTaintBasedEvictions { if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter( nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize)) nc.enterPartialDisruptionFunc(zoneSize))
} else { } else {
@ -1365,7 +1354,7 @@ func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneStat
nc.enterPartialDisruptionFunc(zoneSize)) nc.enterPartialDisruptionFunc(zoneSize))
} }
case stateFullDisruption: case stateFullDisruption:
if nc.useTaintBasedEvictions { if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter( nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize)) nc.enterFullDisruptionFunc(zoneSize))
} else { } else {
@ -1431,7 +1420,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
zone := utilnode.GetZoneKey(node) zone := utilnode.GetZoneKey(node)
if _, found := nc.zoneStates[zone]; !found { if _, found := nc.zoneStates[zone]; !found {
nc.zoneStates[zone] = stateInitial nc.zoneStates[zone] = stateInitial
if !nc.useTaintBasedEvictions { if !nc.runTaintManager {
nc.zonePodEvictor[zone] = nc.zonePodEvictor[zone] =
scheduler.NewRateLimitedTimedQueue( scheduler.NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst)) flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))

View File

@ -180,7 +180,6 @@ func newNodeLifecycleControllerFromClient(
largeClusterThreshold, largeClusterThreshold,
unhealthyZoneThreshold, unhealthyZoneThreshold,
useTaints, useTaints,
useTaints,
) )
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -61,6 +61,7 @@ const (
// owner: @Huang-Wei // owner: @Huang-Wei
// beta: v1.13 // beta: v1.13
// ga: v1.18
// //
// Changes the logic behind evicting Pods from not ready Nodes // Changes the logic behind evicting Pods from not ready Nodes
// to take advantage of NoExecute Taints and Tolerations. // to take advantage of NoExecute Taints and Tolerations.
@ -592,7 +593,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
DynamicKubeletConfig: {Default: true, PreRelease: featuregate.Beta}, DynamicKubeletConfig: {Default: true, PreRelease: featuregate.Beta},
ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: featuregate.Beta}, ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: featuregate.Beta},
DevicePlugins: {Default: true, PreRelease: featuregate.Beta}, DevicePlugins: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.Beta}, TaintBasedEvictions: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
RotateKubeletServerCertificate: {Default: true, PreRelease: featuregate.Beta}, RotateKubeletServerCertificate: {Default: true, PreRelease: featuregate.Beta},
RotateKubeletClientCertificate: {Default: true, PreRelease: featuregate.Beta}, RotateKubeletClientCertificate: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta}, LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},

View File

@ -18,38 +18,31 @@ package v1
const ( const (
// TaintNodeNotReady will be added when node is not ready // TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready. // and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready" TaintNodeNotReady = "node.kubernetes.io/not-ready"
// TaintNodeUnreachable will be added when node becomes unreachable // TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown) // (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue). // and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable" TaintNodeUnreachable = "node.kubernetes.io/unreachable"
// TaintNodeUnschedulable will be added when node becomes unschedulable // TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable. // and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable" TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeMemoryPressure will be added when node has memory pressure // TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory. // and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure" TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"
// TaintNodeDiskPressure will be added when node has disk pressure // TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk. // and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure" TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"
// TaintNodeNetworkUnavailable will be added when node's network is unavailable // TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready. // and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable" TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"
// TaintNodePIDPressure will be added when node has pid pressure // TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk. // and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure" TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
) )

View File

@ -15,7 +15,6 @@ go_test(
tags = ["integration"], tags = ["integration"],
deps = [ deps = [
"//pkg/controller/nodelifecycle:go_default_library", "//pkg/controller/nodelifecycle:go_default_library",
"//pkg/features:go_default_library",
"//plugin/pkg/admission/defaulttolerationseconds:go_default_library", "//plugin/pkg/admission/defaulttolerationseconds:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction:go_default_library", "//plugin/pkg/admission/podtolerationrestriction:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library", "//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
@ -25,11 +24,9 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library", "//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library", "//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library", "//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//test/e2e/framework/pod:go_default_library", "//test/e2e/framework/pod:go_default_library",
"//test/integration/framework:go_default_library", "//test/integration/framework:go_default_library",
"//test/integration/util:go_default_library", "//test/integration/util:go_default_library",

View File

@ -28,13 +28,10 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apiserver/pkg/admission" "k8s.io/apiserver/pkg/admission"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest" restclient "k8s.io/client-go/rest"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/controller/nodelifecycle" "k8s.io/kubernetes/pkg/controller/nodelifecycle"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds" "k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction" "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction" pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
@ -109,9 +106,6 @@ func TestTaintBasedEvictions(t *testing.T) {
}, },
} }
// Enable TaintBasedEvictions
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()
// Build admission chain handler. // Build admission chain handler.
podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{}) podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
admission := admission.NewChainHandler( admission := admission.NewChainHandler(
@ -156,7 +150,6 @@ func TestTaintBasedEvictions(t *testing.T) {
50, // Large cluster threshold 50, // Large cluster threshold
0.55, // Unhealthy zone threshold 0.55, // Unhealthy zone threshold
true, // Run taint manager true, // Run taint manager
true, // Use taint based evictions
) )
if err != nil { if err != nil {
t.Errorf("Failed to create node controller: %v", err) t.Errorf("Failed to create node controller: %v", err)

View File

@ -98,7 +98,6 @@ func TestTaintNodeByCondition(t *testing.T) {
100, // Large cluster threshold 100, // Large cluster threshold
100, // Unhealthy zone threshold 100, // Unhealthy zone threshold
true, // Run taint manager true, // Run taint manager
true, // Use taint based evictions
) )
if err != nil { if err != nil {
t.Errorf("Failed to create node controller: %v", err) t.Errorf("Failed to create node controller: %v", err)