diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 302a75250ee..cdcc1776762 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -57,7 +57,7 @@ import ( "k8s.io/component-base/featuregate" "k8s.io/component-base/logs" logsapi "k8s.io/component-base/logs/api/v1" - "k8s.io/component-base/metrics/features" + metricsfeatures "k8s.io/component-base/metrics/features" controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers" "k8s.io/component-base/metrics/prometheus/slis" "k8s.io/component-base/term" @@ -75,12 +75,13 @@ import ( "k8s.io/kubernetes/cmd/kube-controller-manager/names" kubectrlmgrconfig "k8s.io/kubernetes/pkg/controller/apis/config" serviceaccountcontroller "k8s.io/kubernetes/pkg/controller/serviceaccount" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/serviceaccount" ) func init() { utilruntime.Must(logsapi.AddFeatureGates(utilfeature.DefaultMutableFeatureGate)) - utilruntime.Must(features.AddFeatureGates(utilfeature.DefaultMutableFeatureGate)) + utilruntime.Must(metricsfeatures.AddFeatureGates(utilfeature.DefaultMutableFeatureGate)) } const ( @@ -556,6 +557,10 @@ func NewControllerDescriptors() map[string]*ControllerDescriptor { register(newResourceClaimControllerDescriptor()) register(newLegacyServiceAccountTokenCleanerControllerDescriptor()) register(newValidatingAdmissionPolicyStatusControllerDescriptor()) + if utilfeature.DefaultFeatureGate.Enabled(features.SeparateTaintEvictionController) { + // register the flag only if the SeparateTaintEvictionController flag is enabled + register(newTaintEvictionControllerDescriptor()) + } for _, alias := range aliases.UnsortedList() { if _, ok := controllers[alias]; ok { diff --git a/cmd/kube-controller-manager/app/controllermanager_test.go b/cmd/kube-controller-manager/app/controllermanager_test.go index 8cd85d899ed..ceea94ae592 100644 --- a/cmd/kube-controller-manager/app/controllermanager_test.go +++ b/cmd/kube-controller-manager/app/controllermanager_test.go @@ -28,8 +28,9 @@ import ( cpnames "k8s.io/cloud-provider/names" "k8s.io/component-base/featuregate" featuregatetesting "k8s.io/component-base/featuregate/testing" - "k8s.io/kubernetes/cmd/kube-controller-manager/names" + "k8s.io/kubernetes/pkg/features" + "k8s.io/utils/strings/slices" ) func TestControllerNamesConsistency(t *testing.T) { @@ -73,6 +74,7 @@ func TestControllerNamesDeclaration(t *testing.T) { names.TokenCleanerController, names.NodeIpamController, names.NodeLifecycleController, + names.TaintEvictionController, cpnames.ServiceLBController, cpnames.NodeRouteController, cpnames.CloudNodeLifecycleController, @@ -156,3 +158,17 @@ func TestFeatureGatedControllersShouldNotDefineAliases(t *testing.T) { } } } + +// TestTaintEvictionControllerDeclaration ensures that it is possible to run taint-manager as a separated controller +// only when the SeparateTaintEvictionController feature is enabled +func TestTaintEvictionControllerDeclaration(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SeparateTaintEvictionController, true)() + if !slices.Contains(KnownControllers(), names.TaintEvictionController) { + t.Errorf("TaintEvictionController should be a registered controller when the SeparateTaintEvictionController feature is enabled") + } + + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SeparateTaintEvictionController, false)() + if slices.Contains(KnownControllers(), names.TaintEvictionController) { + t.Errorf("TaintEvictionController should not be a registered controller when the SeparateTaintEvictionController feature is disabled") + } +} diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 9709e98955b..467487739ba 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -59,6 +59,7 @@ import ( resourcequotacontroller "k8s.io/kubernetes/pkg/controller/resourcequota" serviceaccountcontroller "k8s.io/kubernetes/pkg/controller/serviceaccount" "k8s.io/kubernetes/pkg/controller/storageversiongc" + "k8s.io/kubernetes/pkg/controller/tainteviction" ttlcontroller "k8s.io/kubernetes/pkg/controller/ttl" "k8s.io/kubernetes/pkg/controller/ttlafterfinished" "k8s.io/kubernetes/pkg/controller/volume/attachdetach" @@ -219,6 +220,32 @@ func startNodeLifecycleController(ctx context.Context, controllerContext Control return nil, true, nil } +func newTaintEvictionControllerDescriptor() *ControllerDescriptor { + return &ControllerDescriptor{ + name: names.TaintEvictionController, + initFunc: startTaintEvictionController, + requiredFeatureGates: []featuregate.Feature{ + features.SeparateTaintEvictionController, + }, + } +} + +func startTaintEvictionController(ctx context.Context, controllerContext ControllerContext, controllerName string) (controller.Interface, bool, error) { + taintEvictionController, err := tainteviction.New( + ctx, + // taint-manager uses existing cluster role from node-controller + controllerContext.ClientBuilder.ClientOrDie("node-controller"), + controllerContext.InformerFactory.Core().V1().Pods(), + controllerContext.InformerFactory.Core().V1().Nodes(), + controllerName, + ) + if err != nil { + return nil, false, err + } + go taintEvictionController.Run(ctx) + return nil, true, nil +} + func newCloudNodeLifecycleControllerDescriptor() *ControllerDescriptor { return &ControllerDescriptor{ name: cpnames.CloudNodeLifecycleController, diff --git a/cmd/kube-controller-manager/names/controller_names.go b/cmd/kube-controller-manager/names/controller_names.go index cfda67d775f..fed6d887a86 100644 --- a/cmd/kube-controller-manager/names/controller_names.go +++ b/cmd/kube-controller-manager/names/controller_names.go @@ -68,6 +68,7 @@ const ( TokenCleanerController = "token-cleaner-controller" NodeIpamController = "node-ipam-controller" NodeLifecycleController = "node-lifecycle-controller" + TaintEvictionController = "taint-eviction-controller" PersistentVolumeBinderController = "persistentvolume-binder-controller" PersistentVolumeAttachDetachController = "persistentvolume-attach-detach-controller" PersistentVolumeExpanderController = "persistentvolume-expander-controller" diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index 08dfd358226..f2cb62444a4 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -37,6 +37,7 @@ import ( "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" appsv1informers "k8s.io/client-go/informers/apps/v1" coordinformers "k8s.io/client-go/informers/coordination/v1" coreinformers "k8s.io/client-go/informers/core/v1" @@ -54,7 +55,9 @@ import ( kubeletapis "k8s.io/kubelet/pkg/apis" "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler" + "k8s.io/kubernetes/pkg/controller/tainteviction" controllerutil "k8s.io/kubernetes/pkg/controller/util/node" + "k8s.io/kubernetes/pkg/features" taintutils "k8s.io/kubernetes/pkg/util/taints" ) @@ -127,6 +130,13 @@ const ( // podUpdateWorkerSizes assumes that in most cases pod will be handled by monitorNodeHealth pass. // Pod update workers will only handle lagging cache pods. 4 workers should be enough. podUpdateWorkerSize = 4 + // nodeUpdateWorkerSize defines the size of workers for node update or/and pod update. + nodeUpdateWorkerSize = 8 + + // taintEvictionController is defined here in order to prevent imports of + // k8s.io/kubernetes/cmd/kube-controller-manager/names which would result in validation errors. + // This constant will be removed upon graduation of the SeparateTaintEvictionController feature. + taintEvictionController = "taint-eviction-controller" ) // labelReconcileInfo lists Node labels to reconcile, and how to reconcile them. @@ -207,7 +217,7 @@ type podUpdateItem struct { // Controller is the controller that manages node's life cycle. type Controller struct { - taintManager *scheduler.NoExecuteTaintManager + taintManager *tainteviction.Controller podLister corelisters.PodLister podInformerSynced cache.InformerSynced @@ -326,7 +336,7 @@ func NewNodeLifecycleController( nodeMonitorPeriod: nodeMonitorPeriod, nodeStartupGracePeriod: nodeStartupGracePeriod, nodeMonitorGracePeriod: nodeMonitorGracePeriod, - nodeUpdateWorkerSize: scheduler.UpdateWorkerSize, + nodeUpdateWorkerSize: nodeUpdateWorkerSize, zoneNoExecuteTainter: make(map[string]*scheduler.RateLimitedTimedQueue), nodesToRetry: sync.Map{}, zoneStates: make(map[string]ZoneState), @@ -346,17 +356,11 @@ func NewNodeLifecycleController( AddFunc: func(obj interface{}) { pod := obj.(*v1.Pod) nc.podUpdated(nil, pod) - if nc.taintManager != nil { - nc.taintManager.PodUpdated(nil, pod) - } }, UpdateFunc: func(prev, obj interface{}) { prevPod := prev.(*v1.Pod) newPod := obj.(*v1.Pod) nc.podUpdated(prevPod, newPod) - if nc.taintManager != nil { - nc.taintManager.PodUpdated(prevPod, newPod) - } }, DeleteFunc: func(obj interface{}) { pod, isPod := obj.(*v1.Pod) @@ -374,9 +378,6 @@ func NewNodeLifecycleController( } } nc.podUpdated(pod, nil) - if nc.taintManager != nil { - nc.taintManager.PodUpdated(pod, nil) - } }, }) nc.podInformerSynced = podInformer.Informer().HasSynced @@ -412,21 +413,14 @@ func NewNodeLifecycleController( nc.podLister = podInformer.Lister() nc.nodeLister = nodeInformer.Lister() - nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, nc.podLister, nc.nodeLister, nc.getPodsAssignedToNode) - nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error { - nc.taintManager.NodeUpdated(nil, node) - return nil - }), - UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(oldNode, newNode *v1.Node) error { - nc.taintManager.NodeUpdated(oldNode, newNode) - return nil - }), - DeleteFunc: controllerutil.CreateDeleteNodeHandler(logger, func(node *v1.Node) error { - nc.taintManager.NodeUpdated(node, nil) - return nil - }), - }) + if !utilfeature.DefaultFeatureGate.Enabled(features.SeparateTaintEvictionController) { + logger.Info("Running TaintEvictionController as part of NodeLifecyleController") + tm, err := tainteviction.New(ctx, kubeClient, podInformer, nodeInformer, taintEvictionController) + if err != nil { + return nil, err + } + nc.taintManager = tm + } logger.Info("Controller will reconcile labels") nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -480,10 +474,13 @@ func (nc *Controller) Run(ctx context.Context) { return } - go nc.taintManager.Run(ctx) + if !utilfeature.DefaultFeatureGate.Enabled(features.SeparateTaintEvictionController) { + logger.Info("Starting", "controller", taintEvictionController) + go nc.taintManager.Run(ctx) + } // Start workers to reconcile labels and/or update NoSchedule taint for nodes. - for i := 0; i < scheduler.UpdateWorkerSize; i++ { + for i := 0; i < nodeUpdateWorkerSize; i++ { // Thanks to "workqueue", each worker just need to get item from queue, because // the item is flagged when got from queue: if new event come, the new item will // be re-queued until "Done", so no more than one worker handle the same item and diff --git a/pkg/controller/tainteviction/OWNERS b/pkg/controller/tainteviction/OWNERS new file mode 100644 index 00000000000..19c02ee0b10 --- /dev/null +++ b/pkg/controller/tainteviction/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +approvers: + - sig-scheduling-maintainers +reviewers: + - sig-scheduling +labels: + - sig/scheduling diff --git a/pkg/controller/tainteviction/doc.go b/pkg/controller/tainteviction/doc.go new file mode 100644 index 00000000000..aac6640aa28 --- /dev/null +++ b/pkg/controller/tainteviction/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package tainteviction contains the logic implementing taint-based eviction +// for Pods running on Nodes with NoExecute taints. +package tainteviction diff --git a/pkg/controller/tainteviction/metrics/metrics.go b/pkg/controller/tainteviction/metrics/metrics.go new file mode 100644 index 00000000000..600c22c81e7 --- /dev/null +++ b/pkg/controller/tainteviction/metrics/metrics.go @@ -0,0 +1,60 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const taintEvictionControllerSubsystem = "taint_eviction_controller" + +var ( + // PodDeletionsTotal counts the number of Pods deleted by TaintEvictionController since its start. + PodDeletionsTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: taintEvictionControllerSubsystem, + Name: "pod_deletions_total", + Help: "Total number of Pods deleted by TaintEvictionController since its start.", + StabilityLevel: metrics.ALPHA, + }, + ) + + // PodDeletionsLatency tracks the latency, in seconds, between the time when a taint effect has been activated + // for the Pod and its deletion. + PodDeletionsLatency = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: taintEvictionControllerSubsystem, + Name: "pod_deletion_duration_seconds", + Help: "Latency, in seconds, between the time when a taint effect has been activated for the Pod and its deletion via TaintEvictionController.", + Buckets: []float64{0.005, 0.025, 0.1, 0.5, 1, 2.5, 10, 30, 60, 120, 180, 240}, // 5ms to 4m + StabilityLevel: metrics.ALPHA, + }, + ) +) + +var registerMetrics sync.Once + +// Register registers TaintEvictionController metrics. +func Register() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(PodDeletionsTotal) + legacyregistry.MustRegister(PodDeletionsLatency) + }) +} diff --git a/pkg/controller/nodelifecycle/scheduler/taint_manager.go b/pkg/controller/tainteviction/taint_eviction.go similarity index 75% rename from pkg/controller/nodelifecycle/scheduler/taint_manager.go rename to pkg/controller/tainteviction/taint_eviction.go index 65e669e45b5..f09ed443b4d 100644 --- a/pkg/controller/nodelifecycle/scheduler/taint_manager.go +++ b/pkg/controller/tainteviction/taint_eviction.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scheduler +package tainteviction import ( "context" @@ -31,19 +31,22 @@ import ( "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apiserver/pkg/util/feature" + corev1informers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" v1core "k8s.io/client-go/kubernetes/typed/core/v1" corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" apipod "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/apis/core/helper" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + "k8s.io/kubernetes/pkg/controller/tainteviction/metrics" + controllerutil "k8s.io/kubernetes/pkg/controller/util/node" "k8s.io/kubernetes/pkg/features" utilpod "k8s.io/kubernetes/pkg/util/pod" - - "k8s.io/klog/v2" ) const ( @@ -77,14 +80,18 @@ func hash(val string, max int) int { // GetPodsByNodeNameFunc returns the list of pods assigned to the specified node. type GetPodsByNodeNameFunc func(nodeName string) ([]*v1.Pod, error) -// NoExecuteTaintManager listens to Taint/Toleration changes and is responsible for removing Pods +// Controller listens to Taint/Toleration changes and is responsible for removing Pods // from Nodes tainted with NoExecute Taints. -type NoExecuteTaintManager struct { +type Controller struct { + name string + client clientset.Interface broadcaster record.EventBroadcaster recorder record.EventRecorder podLister corelisters.PodLister + podListerSynced cache.InformerSynced nodeLister corelisters.NodeLister + nodeListerSynced cache.InformerSynced getPodsAssignedToNode GetPodsByNodeNameFunc taintEvictionQueue *TimedWorkerQueue @@ -99,11 +106,11 @@ type NoExecuteTaintManager struct { podUpdateQueue workqueue.Interface } -func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName)) func(ctx context.Context, args *WorkArgs) error { - return func(ctx context.Context, args *WorkArgs) error { +func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName), controllerName string) func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { + return func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { ns := args.NamespacedName.Namespace name := args.NamespacedName.Name - klog.FromContext(ctx).Info("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String()) + klog.FromContext(ctx).Info("Deleting pod", "controller", controllerName, "pod", args.NamespacedName) if emitEventFunc != nil { emitEventFunc(args.NamespacedName) } @@ -111,6 +118,8 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced for i := 0; i < retries; i++ { err = addConditionAndDeletePod(ctx, c, name, ns) if err == nil { + metrics.PodDeletionsTotal.Inc() + metrics.PodDeletionsLatency.Observe(float64(time.Since(fireAt) * time.Second)) break } time.Sleep(10 * time.Millisecond) @@ -175,34 +184,106 @@ func getMinTolerationTime(tolerations []v1.Toleration) time.Duration { return time.Duration(minTolerationTime) * time.Second } -// NewNoExecuteTaintManager creates a new NoExecuteTaintManager that will use passed clientset to -// communicate with the API server. -func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, podLister corelisters.PodLister, nodeLister corelisters.NodeLister, getPodsAssignedToNode GetPodsByNodeNameFunc) *NoExecuteTaintManager { +// New creates a new Controller that will use passed clientset to communicate with the API server. +func New(ctx context.Context, c clientset.Interface, podInformer corev1informers.PodInformer, nodeInformer corev1informers.NodeInformer, controllerName string) (*Controller, error) { + logger := klog.FromContext(ctx) + metrics.Register() eventBroadcaster := record.NewBroadcaster() - recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"}) + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: controllerName}) - tm := &NoExecuteTaintManager{ - client: c, - broadcaster: eventBroadcaster, - recorder: recorder, - podLister: podLister, - nodeLister: nodeLister, - getPodsAssignedToNode: getPodsAssignedToNode, - taintedNodes: make(map[string][]v1.Taint), + podIndexer := podInformer.Informer().GetIndexer() - nodeUpdateQueue: workqueue.NewNamed("noexec_taint_node"), - podUpdateQueue: workqueue.NewNamed("noexec_taint_pod"), + tm := &Controller{ + name: controllerName, + + client: c, + broadcaster: eventBroadcaster, + recorder: recorder, + podLister: podInformer.Lister(), + podListerSynced: podInformer.Informer().HasSynced, + nodeLister: nodeInformer.Lister(), + nodeListerSynced: nodeInformer.Informer().HasSynced, + getPodsAssignedToNode: func(nodeName string) ([]*v1.Pod, error) { + objs, err := podIndexer.ByIndex("spec.nodeName", nodeName) + if err != nil { + return nil, err + } + pods := make([]*v1.Pod, 0, len(objs)) + for _, obj := range objs { + pod, ok := obj.(*v1.Pod) + if !ok { + continue + } + pods = append(pods, pod) + } + return pods, nil + }, + taintedNodes: make(map[string][]v1.Taint), + + nodeUpdateQueue: workqueue.NewWithConfig(workqueue.QueueConfig{Name: "noexec_taint_node"}), + podUpdateQueue: workqueue.NewWithConfig(workqueue.QueueConfig{Name: "noexec_taint_pod"}), } - tm.taintEvictionQueue = CreateWorkerQueue(deletePodHandler(c, tm.emitPodDeletionEvent)) + tm.taintEvictionQueue = CreateWorkerQueue(deletePodHandler(c, tm.emitPodDeletionEvent, tm.name)) - return tm + _, err := podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + pod := obj.(*v1.Pod) + tm.PodUpdated(nil, pod) + }, + UpdateFunc: func(prev, obj interface{}) { + prevPod := prev.(*v1.Pod) + newPod := obj.(*v1.Pod) + tm.PodUpdated(prevPod, newPod) + }, + DeleteFunc: func(obj interface{}) { + pod, isPod := obj.(*v1.Pod) + // We can get DeletedFinalStateUnknown instead of *v1.Pod here and we need to handle that correctly. + if !isPod { + deletedState, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + logger.Error(nil, "Received unexpected object", "object", obj) + return + } + pod, ok = deletedState.Obj.(*v1.Pod) + if !ok { + logger.Error(nil, "DeletedFinalStateUnknown contained non-Pod object", "object", deletedState.Obj) + return + } + } + tm.PodUpdated(pod, nil) + }, + }) + if err != nil { + return nil, fmt.Errorf("unable to add pod event handler: %w", err) + } + + _, err = nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error { + tm.NodeUpdated(nil, node) + return nil + }), + UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(oldNode, newNode *v1.Node) error { + tm.NodeUpdated(oldNode, newNode) + return nil + }), + DeleteFunc: controllerutil.CreateDeleteNodeHandler(logger, func(node *v1.Node) error { + tm.NodeUpdated(node, nil) + return nil + }), + }) + if err != nil { + return nil, fmt.Errorf("unable to add node event handler: %w", err) + } + + return tm, nil } -// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed. -func (tc *NoExecuteTaintManager) Run(ctx context.Context) { +// Run starts the controller which will run in loop until `stopCh` is closed. +func (tc *Controller) Run(ctx context.Context) { defer utilruntime.HandleCrash() logger := klog.FromContext(ctx) - logger.Info("Starting NoExecuteTaintManager") + logger.Info("Starting", "controller", tc.name) + defer logger.Info("Shutting down controller", "controller", tc.name) // Start events processing pipeline. tc.broadcaster.StartStructuredLogging(0) @@ -210,14 +291,18 @@ func (tc *NoExecuteTaintManager) Run(ctx context.Context) { logger.Info("Sending events to api server") tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")}) } else { - logger.Error(nil, "kubeClient is nil when starting NodeController") + logger.Error(nil, "kubeClient is nil", "controller", tc.name) klog.FlushAndExit(klog.ExitFlushTimeout, 1) } defer tc.broadcaster.Shutdown() - defer tc.nodeUpdateQueue.ShutDown() defer tc.podUpdateQueue.ShutDown() + // wait for the cache to be synced + if !cache.WaitForNamedCacheSync(tc.name, ctx.Done(), tc.podListerSynced, tc.nodeListerSynced) { + return + } + for i := 0; i < UpdateWorkerSize; i++ { tc.nodeUpdateChannels = append(tc.nodeUpdateChannels, make(chan nodeUpdateItem, NodeUpdateChannelSize)) tc.podUpdateChannels = append(tc.podUpdateChannels, make(chan podUpdateItem, podUpdateChannelSize)) @@ -273,11 +358,11 @@ func (tc *NoExecuteTaintManager) Run(ctx context.Context) { wg.Wait() } -func (tc *NoExecuteTaintManager) worker(ctx context.Context, worker int, done func(), stopCh <-chan struct{}) { +func (tc *Controller) worker(ctx context.Context, worker int, done func(), stopCh <-chan struct{}) { defer done() // When processing events we want to prioritize Node updates over Pod updates, - // as NodeUpdates that interest NoExecuteTaintManager should be handled as soon as possible - + // as NodeUpdates that interest the controller should be handled as soon as possible - // we don't want user (or system) to wait until PodUpdate queue is drained before it can // start evicting Pods from tainted Nodes. for { @@ -307,7 +392,7 @@ func (tc *NoExecuteTaintManager) worker(ctx context.Context, worker int, done fu } // PodUpdated is used to notify NoExecuteTaintManager about Pod changes. -func (tc *NoExecuteTaintManager) PodUpdated(oldPod *v1.Pod, newPod *v1.Pod) { +func (tc *Controller) PodUpdated(oldPod *v1.Pod, newPod *v1.Pod) { podName := "" podNamespace := "" nodeName := "" @@ -339,7 +424,7 @@ func (tc *NoExecuteTaintManager) PodUpdated(oldPod *v1.Pod, newPod *v1.Pod) { } // NodeUpdated is used to notify NoExecuteTaintManager about Node changes. -func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node) { +func (tc *Controller) NodeUpdated(oldNode *v1.Node, newNode *v1.Node) { nodeName := "" oldTaints := []v1.Taint{} if oldNode != nil { @@ -363,13 +448,13 @@ func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node) tc.nodeUpdateQueue.Add(updateItem) } -func (tc *NoExecuteTaintManager) cancelWorkWithEvent(logger klog.Logger, nsName types.NamespacedName) { +func (tc *Controller) cancelWorkWithEvent(logger klog.Logger, nsName types.NamespacedName) { if tc.taintEvictionQueue.CancelWork(logger, nsName.String()) { tc.emitCancelPodDeletionEvent(nsName) } } -func (tc *NoExecuteTaintManager) processPodOnNode( +func (tc *Controller) processPodOnNode( ctx context.Context, podNamespacedName types.NamespacedName, nodeName string, @@ -410,7 +495,7 @@ func (tc *NoExecuteTaintManager) processPodOnNode( tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime) } -func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) { +func (tc *Controller) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) { pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName) logger := klog.FromContext(ctx) if err != nil { @@ -451,7 +536,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate tc.processPodOnNode(ctx, podNamespacedName, nodeName, pod.Spec.Tolerations, taints, time.Now()) } -func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) { +func (tc *Controller) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) { node, err := tc.nodeLister.Get(nodeUpdate.nodeName) logger := klog.FromContext(ctx) if err != nil { @@ -508,7 +593,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat } } -func (tc *NoExecuteTaintManager) emitPodDeletionEvent(nsName types.NamespacedName) { +func (tc *Controller) emitPodDeletionEvent(nsName types.NamespacedName) { if tc.recorder == nil { return } @@ -521,7 +606,7 @@ func (tc *NoExecuteTaintManager) emitPodDeletionEvent(nsName types.NamespacedNam tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Marking for deletion Pod %s", nsName.String()) } -func (tc *NoExecuteTaintManager) emitCancelPodDeletionEvent(nsName types.NamespacedName) { +func (tc *Controller) emitCancelPodDeletionEvent(nsName types.NamespacedName) { if tc.recorder == nil { return } diff --git a/pkg/controller/nodelifecycle/scheduler/taint_manager_test.go b/pkg/controller/tainteviction/taint_eviction_test.go similarity index 82% rename from pkg/controller/nodelifecycle/scheduler/taint_manager_test.go rename to pkg/controller/tainteviction/taint_eviction_test.go index eb76101773b..ea1b3b6794d 100644 --- a/pkg/controller/nodelifecycle/scheduler/taint_manager_test.go +++ b/pkg/controller/tainteviction/taint_eviction_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scheduler +package tainteviction import ( "context" @@ -25,7 +25,7 @@ import ( "github.com/google/go-cmp/cmp" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" @@ -44,16 +44,16 @@ import ( var timeForControllerToProgressForSanityCheck = 20 * time.Millisecond func getPodsAssignedToNode(ctx context.Context, c *fake.Clientset) GetPodsByNodeNameFunc { - return func(nodeName string) ([]*v1.Pod, error) { + return func(nodeName string) ([]*corev1.Pod, error) { selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}) - pods, err := c.CoreV1().Pods(v1.NamespaceAll).List(ctx, metav1.ListOptions{ + pods, err := c.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{ FieldSelector: selector.String(), LabelSelector: labels.Everything().String(), }) if err != nil { - return []*v1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName) + return []*corev1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName) } - rPods := make([]*v1.Pod, len(pods.Items)) + rPods := make([]*corev1.Pod, len(pods.Items)) for i := range pods.Items { rPods[i] = &pods.Items[i] } @@ -61,31 +61,31 @@ func getPodsAssignedToNode(ctx context.Context, c *fake.Clientset) GetPodsByNode } } -func createNoExecuteTaint(index int) v1.Taint { +func createNoExecuteTaint(index int) corev1.Taint { now := metav1.Now() - return v1.Taint{ + return corev1.Taint{ Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), - Effect: v1.TaintEffectNoExecute, + Effect: corev1.TaintEffectNoExecute, TimeAdded: &now, } } -func addToleration(pod *v1.Pod, index int, duration int64) *v1.Pod { +func addToleration(pod *corev1.Pod, index int, duration int64) *corev1.Pod { if pod.Annotations == nil { pod.Annotations = map[string]string{} } if duration < 0 { - pod.Spec.Tolerations = []v1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoExecute}} + pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute}} } else { - pod.Spec.Tolerations = []v1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoExecute, TolerationSeconds: &duration}} + pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &duration}} } return pod } -func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node { - taints := []v1.Taint{} +func addTaintsToNode(node *corev1.Node, key, value string, indices []int) *corev1.Node { + taints := []corev1.Taint{} for _, index := range indices { taints = append(taints, createNoExecuteTaint(index)) } @@ -93,11 +93,16 @@ func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node { return node } -func setupNewNoExecuteTaintManager(ctx context.Context, fakeClientSet *fake.Clientset) (*NoExecuteTaintManager, cache.Indexer, cache.Indexer) { +var alwaysReady = func() bool { return true } + +func setupNewController(ctx context.Context, fakeClientSet *fake.Clientset) (*Controller, cache.Indexer, cache.Indexer) { informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0) podIndexer := informerFactory.Core().V1().Pods().Informer().GetIndexer() nodeIndexer := informerFactory.Core().V1().Nodes().Informer().GetIndexer() - mgr := NewNoExecuteTaintManager(ctx, fakeClientSet, informerFactory.Core().V1().Pods().Lister(), informerFactory.Core().V1().Nodes().Lister(), getPodsAssignedToNode(ctx, fakeClientSet)) + mgr, _ := New(ctx, fakeClientSet, informerFactory.Core().V1().Pods(), informerFactory.Core().V1().Nodes(), "taint-eviction-controller") + mgr.podListerSynced = alwaysReady + mgr.nodeListerSynced = alwaysReady + mgr.getPodsAssignedToNode = getPodsAssignedToNode(ctx, fakeClientSet) return mgr, podIndexer, nodeIndexer } @@ -113,16 +118,16 @@ func (a durationSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a durationSlice) Less(i, j int) bool { return a[i].timestamp < a[j].timestamp } func TestFilterNoExecuteTaints(t *testing.T) { - taints := []v1.Taint{ + taints := []corev1.Taint{ { Key: "one", Value: "one", - Effect: v1.TaintEffectNoExecute, + Effect: corev1.TaintEffectNoExecute, }, { Key: "two", Value: "two", - Effect: v1.TaintEffectNoSchedule, + Effect: corev1.TaintEffectNoSchedule, }, } taints = getNoExecuteTaints(taints) @@ -134,8 +139,8 @@ func TestFilterNoExecuteTaints(t *testing.T) { func TestCreatePod(t *testing.T) { testCases := []struct { description string - pod *v1.Pod - taintedNodes map[string][]v1.Taint + pod *corev1.Pod + taintedNodes map[string][]corev1.Taint expectPatch bool expectDelete bool enablePodDisruptionConditions bool @@ -143,19 +148,19 @@ func TestCreatePod(t *testing.T) { { description: "not scheduled - ignore", pod: testutil.NewPod("pod1", ""), - taintedNodes: map[string][]v1.Taint{}, + taintedNodes: map[string][]corev1.Taint{}, expectDelete: false, }, { description: "scheduled on untainted Node", pod: testutil.NewPod("pod1", "node1"), - taintedNodes: map[string][]v1.Taint{}, + taintedNodes: map[string][]corev1.Taint{}, expectDelete: false, }, { description: "schedule on tainted Node", pod: testutil.NewPod("pod1", "node1"), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: true, @@ -163,7 +168,7 @@ func TestCreatePod(t *testing.T) { { description: "schedule on tainted Node; PodDisruptionConditions enabled", pod: testutil.NewPod("pod1", "node1"), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectPatch: true, @@ -173,7 +178,7 @@ func TestCreatePod(t *testing.T) { { description: "schedule on tainted Node with finite toleration", pod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: false, @@ -181,7 +186,7 @@ func TestCreatePod(t *testing.T) { { description: "schedule on tainted Node with infinite toleration", pod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: false, @@ -189,7 +194,7 @@ func TestCreatePod(t *testing.T) { { description: "schedule on tainted Node with infinite ivalid toleration", pod: addToleration(testutil.NewPod("pod1", "node1"), 2, -1), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: true, @@ -200,8 +205,8 @@ func TestCreatePod(t *testing.T) { t.Run(item.description, func(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)() ctx, cancel := context.WithCancel(context.Background()) - fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*item.pod}}) - controller, podIndexer, _ := setupNewNoExecuteTaintManager(ctx, fakeClientset) + fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.pod}}) + controller, podIndexer, _ := setupNewController(ctx, fakeClientset) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) controller.taintedNodes = item.taintedNodes @@ -221,10 +226,10 @@ func TestDeletePod(t *testing.T) { defer cancel() fakeClientset := fake.NewSimpleClientset() - controller, _, _ := setupNewNoExecuteTaintManager(ctx, fakeClientset) + controller, _, _ := setupNewController(ctx, fakeClientset) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) - controller.taintedNodes = map[string][]v1.Taint{ + controller.taintedNodes = map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, } controller.PodUpdated(testutil.NewPod("pod1", "node1"), nil) @@ -235,10 +240,10 @@ func TestDeletePod(t *testing.T) { func TestUpdatePod(t *testing.T) { testCases := []struct { description string - prevPod *v1.Pod + prevPod *corev1.Pod awaitForScheduledEviction bool - newPod *v1.Pod - taintedNodes map[string][]v1.Taint + newPod *corev1.Pod + taintedNodes map[string][]corev1.Taint expectPatch bool expectDelete bool enablePodDisruptionConditions bool @@ -247,7 +252,7 @@ func TestUpdatePod(t *testing.T) { description: "scheduling onto tainted Node results in patch and delete when PodDisruptionConditions enabled", prevPod: testutil.NewPod("pod1", ""), newPod: testutil.NewPod("pod1", "node1"), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectPatch: true, @@ -258,7 +263,7 @@ func TestUpdatePod(t *testing.T) { description: "scheduling onto tainted Node", prevPod: testutil.NewPod("pod1", ""), newPod: testutil.NewPod("pod1", "node1"), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: true, @@ -267,7 +272,7 @@ func TestUpdatePod(t *testing.T) { description: "scheduling onto tainted Node with toleration", prevPod: addToleration(testutil.NewPod("pod1", ""), 1, -1), newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1), - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: false, @@ -277,7 +282,7 @@ func TestUpdatePod(t *testing.T) { prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), newPod: testutil.NewPod("pod1", "node1"), awaitForScheduledEviction: true, - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: true, @@ -287,7 +292,7 @@ func TestUpdatePod(t *testing.T) { prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 1), newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), awaitForScheduledEviction: true, - taintedNodes: map[string][]v1.Taint{ + taintedNodes: map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, }, expectDelete: true, @@ -298,8 +303,8 @@ func TestUpdatePod(t *testing.T) { t.Run(item.description, func(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)() ctx, cancel := context.WithCancel(context.Background()) - fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*item.prevPod}}) - controller, podIndexer, _ := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset) + fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.prevPod}}) + controller, podIndexer, _ := setupNewController(context.TODO(), fakeClientset) controller.recorder = testutil.NewFakeRecorder() controller.taintedNodes = item.taintedNodes go controller.Run(ctx) @@ -330,14 +335,14 @@ func TestUpdatePod(t *testing.T) { func TestCreateNode(t *testing.T) { testCases := []struct { description string - pods []v1.Pod - node *v1.Node + pods []corev1.Pod + node *corev1.Node expectPatch bool expectDelete bool }{ { description: "Creating Node matching already assigned Pod", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, node: testutil.NewNode("node1"), @@ -346,7 +351,7 @@ func TestCreateNode(t *testing.T) { }, { description: "Creating tainted Node matching already assigned Pod", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), @@ -355,7 +360,7 @@ func TestCreateNode(t *testing.T) { }, { description: "Creating tainted Node matching already assigned tolerating Pod", - pods: []v1.Pod{ + pods: []corev1.Pod{ *addToleration(testutil.NewPod("pod1", "node1"), 1, -1), }, node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), @@ -366,8 +371,8 @@ func TestCreateNode(t *testing.T) { for _, item := range testCases { ctx, cancel := context.WithCancel(context.Background()) - fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods}) - controller, _, nodeIndexer := setupNewNoExecuteTaintManager(ctx, fakeClientset) + fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) + controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) nodeIndexer.Add(item.node) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) @@ -382,9 +387,9 @@ func TestCreateNode(t *testing.T) { func TestDeleteNode(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) fakeClientset := fake.NewSimpleClientset() - controller, _, _ := setupNewNoExecuteTaintManager(ctx, fakeClientset) + controller, _, _ := setupNewController(ctx, fakeClientset) controller.recorder = testutil.NewFakeRecorder() - controller.taintedNodes = map[string][]v1.Taint{ + controller.taintedNodes = map[string][]corev1.Taint{ "node1": {createNoExecuteTaint(1)}, } go controller.Run(ctx) @@ -406,9 +411,9 @@ func TestDeleteNode(t *testing.T) { func TestUpdateNode(t *testing.T) { testCases := []struct { description string - pods []v1.Pod - oldNode *v1.Node - newNode *v1.Node + pods []corev1.Pod + oldNode *corev1.Node + newNode *corev1.Node expectPatch bool expectDelete bool additionalSleep time.Duration @@ -416,7 +421,7 @@ func TestUpdateNode(t *testing.T) { }{ { description: "Added taint, expect node patched and deleted when PodDisruptionConditions is enabled", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, oldNode: testutil.NewNode("node1"), @@ -427,7 +432,7 @@ func TestUpdateNode(t *testing.T) { }, { description: "Added taint", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, oldNode: testutil.NewNode("node1"), @@ -436,7 +441,7 @@ func TestUpdateNode(t *testing.T) { }, { description: "Added tolerated taint", - pods: []v1.Pod{ + pods: []corev1.Pod{ *addToleration(testutil.NewPod("pod1", "node1"), 1, 100), }, oldNode: testutil.NewNode("node1"), @@ -445,7 +450,7 @@ func TestUpdateNode(t *testing.T) { }, { description: "Only one added taint tolerated", - pods: []v1.Pod{ + pods: []corev1.Pod{ *addToleration(testutil.NewPod("pod1", "node1"), 1, 100), }, oldNode: testutil.NewNode("node1"), @@ -454,7 +459,7 @@ func TestUpdateNode(t *testing.T) { }, { description: "Taint removed", - pods: []v1.Pod{ + pods: []corev1.Pod{ *addToleration(testutil.NewPod("pod1", "node1"), 1, 1), }, oldNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), @@ -464,24 +469,24 @@ func TestUpdateNode(t *testing.T) { }, { description: "Pod with multiple tolerations are evicted when first one runs out", - pods: []v1.Pod{ + pods: []corev1.Pod{ { ObjectMeta: metav1.ObjectMeta{ Namespace: "default", Name: "pod1", }, - Spec: v1.PodSpec{ + Spec: corev1.PodSpec{ NodeName: "node1", - Tolerations: []v1.Toleration{ - {Key: "testTaint1", Value: "test1", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]}, - {Key: "testTaint2", Value: "test2", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]}, + Tolerations: []corev1.Toleration{ + {Key: "testTaint1", Value: "test1", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]}, + {Key: "testTaint2", Value: "test2", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]}, }, }, - Status: v1.PodStatus{ - Conditions: []v1.PodCondition{ + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{ { - Type: v1.PodReady, - Status: v1.ConditionTrue, + Type: corev1.PodReady, + Status: corev1.ConditionTrue, }, }, }, @@ -499,8 +504,8 @@ func TestUpdateNode(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods}) - controller, _, nodeIndexer := setupNewNoExecuteTaintManager(ctx, fakeClientset) + fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) + controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) nodeIndexer.Add(item.newNode) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) @@ -521,23 +526,23 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) { minute := int64(60) pod := testutil.NewPod("pod1", "node1") - pod.Spec.Tolerations = []v1.Toleration{ - {Key: taint1.Key, Operator: v1.TolerationOpExists, Effect: v1.TaintEffectNoExecute}, - {Key: taint2.Key, Operator: v1.TolerationOpExists, Effect: v1.TaintEffectNoExecute, TolerationSeconds: &minute}, + pod.Spec.Tolerations = []corev1.Toleration{ + {Key: taint1.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute}, + {Key: taint2.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &minute}, } podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} untaintedNode := testutil.NewNode("node1") doubleTaintedNode := testutil.NewNode("node1") - doubleTaintedNode.Spec.Taints = []v1.Taint{taint1, taint2} + doubleTaintedNode.Spec.Taints = []corev1.Taint{taint1, taint2} singleTaintedNode := testutil.NewNode("node1") - singleTaintedNode.Spec.Taints = []v1.Taint{taint1} + singleTaintedNode.Spec.Taints = []corev1.Taint{taint1} ctx, cancel := context.WithCancel(context.TODO()) fakeClientset := fake.NewSimpleClientset(pod) - controller, _, nodeIndexer := setupNewNoExecuteTaintManager(ctx, fakeClientset) + controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) @@ -585,14 +590,14 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) { func TestUpdateNodeWithMultiplePods(t *testing.T) { testCases := []struct { description string - pods []v1.Pod - oldNode *v1.Node - newNode *v1.Node + pods []corev1.Pod + oldNode *corev1.Node + newNode *corev1.Node expectedDeleteTimes durationSlice }{ { description: "Pods with different toleration times are evicted appropriately", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), *addToleration(testutil.NewPod("pod2", "node1"), 1, 1), *addToleration(testutil.NewPod("pod3", "node1"), 1, -1), @@ -606,7 +611,7 @@ func TestUpdateNodeWithMultiplePods(t *testing.T) { }, { description: "Evict all pods not matching all taints instantly", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), *addToleration(testutil.NewPod("pod2", "node1"), 1, 1), *addToleration(testutil.NewPod("pod3", "node1"), 1, -1), @@ -625,9 +630,9 @@ func TestUpdateNodeWithMultiplePods(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods}) + fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) sort.Sort(item.expectedDeleteTimes) - controller, _, nodeIndexer := setupNewNoExecuteTaintManager(ctx, fakeClientset) + controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) nodeIndexer.Add(item.newNode) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) @@ -704,15 +709,15 @@ func TestGetMinTolerationTime(t *testing.T) { oneSec := 1 * time.Second tests := []struct { - tolerations []v1.Toleration + tolerations []corev1.Toleration expected time.Duration }{ { - tolerations: []v1.Toleration{}, + tolerations: []corev1.Toleration{}, expected: 0, }, { - tolerations: []v1.Toleration{ + tolerations: []corev1.Toleration{ { TolerationSeconds: nil, }, @@ -720,7 +725,7 @@ func TestGetMinTolerationTime(t *testing.T) { expected: -1, }, { - tolerations: []v1.Toleration{ + tolerations: []corev1.Toleration{ { TolerationSeconds: &one, }, @@ -732,7 +737,7 @@ func TestGetMinTolerationTime(t *testing.T) { }, { - tolerations: []v1.Toleration{ + tolerations: []corev1.Toleration{ { TolerationSeconds: &one, }, @@ -743,7 +748,7 @@ func TestGetMinTolerationTime(t *testing.T) { expected: oneSec, }, { - tolerations: []v1.Toleration{ + tolerations: []corev1.Toleration{ { TolerationSeconds: nil, }, @@ -770,17 +775,17 @@ func TestGetMinTolerationTime(t *testing.T) { func TestEventualConsistency(t *testing.T) { testCases := []struct { description string - pods []v1.Pod - prevPod *v1.Pod - newPod *v1.Pod - oldNode *v1.Node - newNode *v1.Node + pods []corev1.Pod + prevPod *corev1.Pod + newPod *corev1.Pod + oldNode *corev1.Node + newNode *corev1.Node expectPatch bool expectDelete bool }{ { description: "existing pod2 scheduled onto tainted Node", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, prevPod: testutil.NewPod("pod2", ""), @@ -792,7 +797,7 @@ func TestEventualConsistency(t *testing.T) { }, { description: "existing pod2 with taint toleration scheduled onto tainted Node", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, prevPod: addToleration(testutil.NewPod("pod2", ""), 1, 100), @@ -804,7 +809,7 @@ func TestEventualConsistency(t *testing.T) { }, { description: "new pod2 created on tainted Node", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, prevPod: nil, @@ -816,7 +821,7 @@ func TestEventualConsistency(t *testing.T) { }, { description: "new pod2 with tait toleration created on tainted Node", - pods: []v1.Pod{ + pods: []corev1.Pod{ *testutil.NewPod("pod1", "node1"), }, prevPod: nil, @@ -833,8 +838,8 @@ func TestEventualConsistency(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods}) - controller, podIndexer, nodeIndexer := setupNewNoExecuteTaintManager(ctx, fakeClientset) + fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) + controller, podIndexer, nodeIndexer := setupNewController(ctx, fakeClientset) nodeIndexer.Add(item.newNode) controller.recorder = testutil.NewFakeRecorder() go controller.Run(ctx) @@ -899,19 +904,19 @@ func TestPodDeletionEvent(t *testing.T) { } t.Run("emitPodDeletionEvent", func(t *testing.T) { - controller := &NoExecuteTaintManager{} + controller := &Controller{} recorder := testutil.NewFakeRecorder() controller.recorder = recorder controller.emitPodDeletionEvent(types.NamespacedName{ Name: "test", Namespace: "test", }) - want := []*v1.Event{ + want := []*corev1.Event{ { ObjectMeta: metav1.ObjectMeta{ Namespace: "test", }, - InvolvedObject: v1.ObjectReference{ + InvolvedObject: corev1.ObjectReference{ Kind: "Pod", APIVersion: "v1", Namespace: "test", @@ -921,7 +926,7 @@ func TestPodDeletionEvent(t *testing.T) { Type: "Normal", Count: 1, Message: "Marking for deletion Pod test/test", - Source: v1.EventSource{Component: "nodeControllerTest"}, + Source: corev1.EventSource{Component: "nodeControllerTest"}, }, } if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 { @@ -930,19 +935,19 @@ func TestPodDeletionEvent(t *testing.T) { }) t.Run("emitCancelPodDeletionEvent", func(t *testing.T) { - controller := &NoExecuteTaintManager{} + controller := &Controller{} recorder := testutil.NewFakeRecorder() controller.recorder = recorder controller.emitCancelPodDeletionEvent(types.NamespacedName{ Name: "test", Namespace: "test", }) - want := []*v1.Event{ + want := []*corev1.Event{ { ObjectMeta: metav1.ObjectMeta{ Namespace: "test", }, - InvolvedObject: v1.ObjectReference{ + InvolvedObject: corev1.ObjectReference{ Kind: "Pod", APIVersion: "v1", Namespace: "test", @@ -952,7 +957,7 @@ func TestPodDeletionEvent(t *testing.T) { Type: "Normal", Count: 1, Message: "Cancelling deletion of Pod test/test", - Source: v1.EventSource{Component: "nodeControllerTest"}, + Source: corev1.EventSource{Component: "nodeControllerTest"}, }, } if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 { diff --git a/pkg/controller/nodelifecycle/scheduler/timed_workers.go b/pkg/controller/tainteviction/timed_workers.go similarity index 85% rename from pkg/controller/nodelifecycle/scheduler/timed_workers.go rename to pkg/controller/tainteviction/timed_workers.go index 0024a0c7c50..8260c86ff36 100644 --- a/pkg/controller/nodelifecycle/scheduler/timed_workers.go +++ b/pkg/controller/tainteviction/timed_workers.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scheduler +package tainteviction import ( "context" @@ -38,7 +38,9 @@ func (w *WorkArgs) KeyFromWorkArgs() string { // NewWorkArgs is a helper function to create new `WorkArgs` func NewWorkArgs(name, namespace string) *WorkArgs { - return &WorkArgs{types.NamespacedName{Namespace: namespace, Name: name}} + return &WorkArgs{ + NamespacedName: types.NamespacedName{Namespace: namespace, Name: name}, + } } // TimedWorker is a responsible for executing a function no earlier than at FireAt time. @@ -50,13 +52,13 @@ type TimedWorker struct { } // createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`. -func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker { +func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, fireAt time.Time, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker { delay := fireAt.Sub(createdAt) logger := klog.FromContext(ctx) fWithErrorLogging := func() { - err := f(ctx, args) + err := f(ctx, fireAt, args) if err != nil { - logger.Error(err, "NodeLifecycle: timed worker failed") + logger.Error(err, "TaintEvictionController: timed worker failed") } } if delay <= 0 { @@ -84,13 +86,13 @@ type TimedWorkerQueue struct { sync.Mutex // map of workers keyed by string returned by 'KeyFromWorkArgs' from the given worker. workers map[string]*TimedWorker - workFunc func(ctx context.Context, args *WorkArgs) error + workFunc func(ctx context.Context, fireAt time.Time, args *WorkArgs) error clock clock.WithDelayedExecution } // CreateWorkerQueue creates a new TimedWorkerQueue for workers that will execute // given function `f`. -func CreateWorkerQueue(f func(ctx context.Context, args *WorkArgs) error) *TimedWorkerQueue { +func CreateWorkerQueue(f func(ctx context.Context, fireAt time.Time, args *WorkArgs) error) *TimedWorkerQueue { return &TimedWorkerQueue{ workers: make(map[string]*TimedWorker), workFunc: f, @@ -98,9 +100,9 @@ func CreateWorkerQueue(f func(ctx context.Context, args *WorkArgs) error) *Timed } } -func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Context, args *WorkArgs) error { - return func(ctx context.Context, args *WorkArgs) error { - err := q.workFunc(ctx, args) +func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { + return func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { + err := q.workFunc(ctx, fireAt, args) q.Lock() defer q.Unlock() if err == nil { diff --git a/pkg/controller/nodelifecycle/scheduler/timed_workers_test.go b/pkg/controller/tainteviction/timed_workers_test.go similarity index 92% rename from pkg/controller/nodelifecycle/scheduler/timed_workers_test.go rename to pkg/controller/tainteviction/timed_workers_test.go index 61e96713891..389ccd3ceb2 100644 --- a/pkg/controller/nodelifecycle/scheduler/timed_workers_test.go +++ b/pkg/controller/tainteviction/timed_workers_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scheduler +package tainteviction import ( "context" @@ -31,7 +31,7 @@ func TestExecute(t *testing.T) { testVal := int32(0) wg := sync.WaitGroup{} wg.Add(5) - queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error { + queue := CreateWorkerQueue(func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { atomic.AddInt32(&testVal, 1) wg.Done() return nil @@ -59,7 +59,7 @@ func TestExecuteDelayed(t *testing.T) { testVal := int32(0) wg := sync.WaitGroup{} wg.Add(5) - queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error { + queue := CreateWorkerQueue(func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { atomic.AddInt32(&testVal, 1) wg.Done() return nil @@ -90,7 +90,7 @@ func TestCancel(t *testing.T) { testVal := int32(0) wg := sync.WaitGroup{} wg.Add(3) - queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error { + queue := CreateWorkerQueue(func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { atomic.AddInt32(&testVal, 1) wg.Done() return nil @@ -124,7 +124,7 @@ func TestCancelAndReadd(t *testing.T) { testVal := int32(0) wg := sync.WaitGroup{} wg.Add(4) - queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error { + queue := CreateWorkerQueue(func(ctx context.Context, fireAt time.Time, args *WorkArgs) error { atomic.AddInt32(&testVal, 1) wg.Done() return nil diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 571047be6e5..c123e3803cb 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -729,6 +729,13 @@ const ( // https://github.com/kubernetes/kubernetes/issues/111516 SecurityContextDeny featuregate.Feature = "SecurityContextDeny" + // owner: @atosatto @yuanchen8911 + // kep: http://kep.k8s.io/3902 + // beta: v1.29 + // + // Decouples Taint Eviction Controller, performing taint-based Pod eviction, from Node Lifecycle Controller. + SeparateTaintEvictionController featuregate.Feature = "SeparateTaintEvictionController" + // owner: @xuzhenglun // kep: http://kep.k8s.io/3682 // alpha: v1.27 @@ -1093,6 +1100,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS SecurityContextDeny: {Default: false, PreRelease: featuregate.Alpha}, + SeparateTaintEvictionController: {Default: true, PreRelease: featuregate.Beta}, + ServiceNodePortStaticSubrange: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.29; remove in 1.31 SidecarContainers: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/test/integration/node/lifecycle_test.go b/test/integration/node/lifecycle_test.go index f3825d6c1a4..ca9bfa60811 100644 --- a/test/integration/node/lifecycle_test.go +++ b/test/integration/node/lifecycle_test.go @@ -32,8 +32,10 @@ import ( restclient "k8s.io/client-go/rest" featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/klog/v2" + "k8s.io/kubernetes/cmd/kube-controller-manager/names" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/controller/nodelifecycle" + "k8s.io/kubernetes/pkg/controller/tainteviction" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds" "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction" @@ -49,13 +51,34 @@ func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) { nodeIndex := 1 // the exact node doesn't matter, pick one tests := map[string]struct { - enablePodDisruptionConditions bool + enablePodDisruptionConditions bool + enableSeparateTaintEvictionController bool + startStandaloneTaintEvictionController bool + wantPodEvicted bool }{ - "Test eviciton for NoExecute taint added by user; pod condition added when PodDisruptionConditions enabled": { - enablePodDisruptionConditions: true, + "Test eviction for NoExecute taint added by user; pod condition added when PodDisruptionConditions enabled; separate taint eviction controller disabled": { + enablePodDisruptionConditions: true, + enableSeparateTaintEvictionController: false, + startStandaloneTaintEvictionController: false, + wantPodEvicted: true, }, - "Test eviciton for NoExecute taint added by user; no pod condition added when PodDisruptionConditions disabled": { - enablePodDisruptionConditions: false, + "Test eviction for NoExecute taint added by user; no pod condition added when PodDisruptionConditions disabled; separate taint eviction controller disabled": { + enablePodDisruptionConditions: false, + enableSeparateTaintEvictionController: false, + startStandaloneTaintEvictionController: false, + wantPodEvicted: true, + }, + "Test eviction for NoExecute taint added by user; separate taint eviction controller enabled but not started": { + enablePodDisruptionConditions: false, + enableSeparateTaintEvictionController: true, + startStandaloneTaintEvictionController: false, + wantPodEvicted: false, + }, + "Test eviction for NoExecute taint added by user; separate taint eviction controller enabled and started": { + enablePodDisruptionConditions: false, + enableSeparateTaintEvictionController: true, + startStandaloneTaintEvictionController: true, + wantPodEvicted: true, }, } @@ -102,6 +125,7 @@ func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) { } defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SeparateTaintEvictionController, test.enableSeparateTaintEvictionController)() testCtx := testutils.InitTestAPIServer(t, "taint-no-execute", nil) cs := testCtx.ClientSet @@ -138,6 +162,18 @@ func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) { // Run all controllers go nc.Run(testCtx.Ctx) + // Start TaintManager + if test.startStandaloneTaintEvictionController { + tm, _ := tainteviction.New( + testCtx.Ctx, + testCtx.ClientSet, + externalInformers.Core().V1().Pods(), + externalInformers.Core().V1().Nodes(), + names.TaintEvictionController, + ) + go tm.Run(testCtx.Ctx) + } + for index := range nodes { nodes[index], err = cs.CoreV1().Nodes().Create(testCtx.Ctx, nodes[index], metav1.CreateOptions{}) if err != nil { @@ -155,9 +191,12 @@ func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) { } err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*20, true, testutils.PodIsGettingEvicted(cs, testPod.Namespace, testPod.Name)) - if err != nil { - t.Fatalf("Error %q in test %q when waiting for terminating pod: %q", err, name, klog.KObj(testPod)) + if err != nil && test.wantPodEvicted { + t.Fatalf("Test Failed: error %v while waiting for pod %q to be evicted", err, klog.KObj(testPod)) + } else if !wait.Interrupted(err) && !test.wantPodEvicted { + t.Fatalf("Test Failed: unexpected eviction of pod %q", klog.KObj(testPod)) } + testPod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, testPod.Name, metav1.GetOptions{}) if err != nil { t.Fatalf("Test Failed: error: %q, while getting updated pod", err) @@ -196,23 +235,34 @@ func TestTaintBasedEvictions(t *testing.T) { }, } tests := []struct { - name string - nodeTaints []v1.Taint - nodeConditions []v1.NodeCondition - pod *v1.Pod - tolerationSeconds int64 - expectedWaitForPodCondition string + name string + nodeTaints []v1.Taint + nodeConditions []v1.NodeCondition + pod *v1.Pod + tolerationSeconds int64 + expectedWaitForPodCondition string + enableSeparateTaintEvictionController bool }{ { - name: "Taint based evictions for NodeNotReady and 200 tolerationseconds", - nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, - nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, - pod: testPod.DeepCopy(), - tolerationSeconds: 200, - expectedWaitForPodCondition: "updated with tolerationSeconds of 200", + name: "Taint based evictions for NodeNotReady and 200 tolerationseconds; separate taint eviction controller disabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, + pod: testPod.DeepCopy(), + tolerationSeconds: 200, + expectedWaitForPodCondition: "updated with tolerationSeconds of 200", + enableSeparateTaintEvictionController: false, }, { - name: "Taint based evictions for NodeNotReady with no pod tolerations", + name: "Taint based evictions for NodeNotReady and 200 tolerationseconds; separate taint eviction controller enabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, + pod: testPod.DeepCopy(), + tolerationSeconds: 200, + expectedWaitForPodCondition: "updated with tolerationSeconds of 200", + enableSeparateTaintEvictionController: true, + }, + { + name: "Taint based evictions for NodeNotReady with no pod tolerations; separate taint eviction controller disabled", nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, pod: &v1.Pod{ @@ -223,21 +273,55 @@ func TestTaintBasedEvictions(t *testing.T) { }, }, }, - tolerationSeconds: 300, - expectedWaitForPodCondition: "updated with tolerationSeconds=300", + tolerationSeconds: 300, + expectedWaitForPodCondition: "updated with tolerationSeconds=300", + enableSeparateTaintEvictionController: false, }, { - name: "Taint based evictions for NodeNotReady and 0 tolerationseconds", - nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, - nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, - pod: testPod.DeepCopy(), - tolerationSeconds: 0, - expectedWaitForPodCondition: "terminating", + name: "Taint based evictions for NodeNotReady with no pod tolerations; separate taint eviction controller enabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "testpod1"}, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + {Name: "container", Image: imageutils.GetPauseImageName()}, + }, + }, + }, + tolerationSeconds: 300, + expectedWaitForPodCondition: "updated with tolerationSeconds=300", + enableSeparateTaintEvictionController: true, }, { - name: "Taint based evictions for NodeUnreachable", - nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}}, - nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}}, + name: "Taint based evictions for NodeNotReady and 0 tolerationseconds; separate taint eviction controller disabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, + pod: testPod.DeepCopy(), + tolerationSeconds: 0, + expectedWaitForPodCondition: "terminating", + enableSeparateTaintEvictionController: false, + }, + { + name: "Taint based evictions for NodeNotReady and 0 tolerationseconds; separate taint eviction controller enabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, + pod: testPod.DeepCopy(), + tolerationSeconds: 0, + expectedWaitForPodCondition: "terminating", + enableSeparateTaintEvictionController: true, + }, + { + name: "Taint based evictions for NodeUnreachable; separate taint eviction controller disabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}}, + enableSeparateTaintEvictionController: false, + }, + { + name: "Taint based evictions for NodeUnreachable; separate taint eviction controller enabled", + nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}}, + nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}}, + enableSeparateTaintEvictionController: true, }, } @@ -249,6 +333,8 @@ func TestTaintBasedEvictions(t *testing.T) { ) for _, test := range tests { t.Run(test.name, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SeparateTaintEvictionController, test.enableSeparateTaintEvictionController)() + testCtx := testutils.InitTestAPIServer(t, "taint-based-evictions", admission) // Build clientset and informers for controllers. @@ -288,6 +374,18 @@ func TestTaintBasedEvictions(t *testing.T) { // Run the controller go nc.Run(testCtx.Ctx) + // Start TaintEvictionController + if test.enableSeparateTaintEvictionController { + tm, _ := tainteviction.New( + testCtx.Ctx, + testCtx.ClientSet, + externalInformers.Core().V1().Pods(), + externalInformers.Core().V1().Nodes(), + names.TaintEvictionController, + ) + go tm.Run(testCtx.Ctx) + } + nodeRes := v1.ResourceList{ v1.ResourceCPU: resource.MustParse("4000m"), v1.ResourceMemory: resource.MustParse("16Gi"),