Merge pull request #111084 from mimowo/retriable-pod-failures-refactor-taint-mngr

Refactor taint_manager to do not use getPod and getNode stubs
This commit is contained in:
Kubernetes Prow Robot 2022-07-19 06:54:06 -07:00 committed by GitHub
commit ddeb3ab90b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 86 deletions

View File

@ -475,12 +475,10 @@ func NewNodeLifecycleController(
return pods, nil
}
nc.podLister = podInformer.Lister()
nc.nodeLister = nodeInformer.Lister()
if nc.runTaintManager {
podGetter := func(name, namespace string) (*v1.Pod, error) { return nc.podLister.Pods(namespace).Get(name) }
nodeLister := nodeInformer.Lister()
nodeGetter := func(name string) (*v1.Node, error) { return nodeLister.Get(name) }
nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, podGetter, nodeGetter, nc.getPodsAssignedToNode)
nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, nc.podLister, nc.nodeLister, nc.getPodsAssignedToNode)
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.taintManager.NodeUpdated(nil, node)
@ -518,7 +516,6 @@ func NewNodeLifecycleController(
nc.leaseLister = leaseInformer.Lister()
nc.leaseInformerSynced = leaseInformer.Informer().HasSynced
nc.nodeLister = nodeInformer.Lister()
nc.nodeInformerSynced = nodeInformer.Informer().HasSynced
nc.daemonSetStore = daemonSetInformer.Lister()

View File

@ -33,6 +33,7 @@ import (
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/apis/core/helper"
@ -69,12 +70,6 @@ func hash(val string, max int) int {
return int(hasher.Sum32() % uint32(max))
}
// GetPodFunc returns the pod for the specified name/namespace, or a NotFound error if missing.
type GetPodFunc func(name, namespace string) (*v1.Pod, error)
// GetNodeFunc returns the node for the specified name, or a NotFound error if missing.
type GetNodeFunc func(name string) (*v1.Node, error)
// GetPodsByNodeNameFunc returns the list of pods assigned to the specified node.
type GetPodsByNodeNameFunc func(nodeName string) ([]*v1.Pod, error)
@ -84,8 +79,8 @@ type NoExecuteTaintManager struct {
client clientset.Interface
broadcaster record.EventBroadcaster
recorder record.EventRecorder
getPod GetPodFunc
getNode GetNodeFunc
podLister corelisters.PodLister
nodeLister corelisters.NodeLister
getPodsAssignedToNode GetPodsByNodeNameFunc
taintEvictionQueue *TimedWorkerQueue
@ -156,7 +151,7 @@ func getMinTolerationTime(tolerations []v1.Toleration) time.Duration {
// NewNoExecuteTaintManager creates a new NoExecuteTaintManager that will use passed clientset to
// communicate with the API server.
func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, getPod GetPodFunc, getNode GetNodeFunc, getPodsAssignedToNode GetPodsByNodeNameFunc) *NoExecuteTaintManager {
func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, podLister corelisters.PodLister, nodeLister corelisters.NodeLister, getPodsAssignedToNode GetPodsByNodeNameFunc) *NoExecuteTaintManager {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"})
@ -164,8 +159,8 @@ func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, getPod
client: c,
broadcaster: eventBroadcaster,
recorder: recorder,
getPod: getPod,
getNode: getNode,
podLister: podLister,
nodeLister: nodeLister,
getPodsAssignedToNode: getPodsAssignedToNode,
taintedNodes: make(map[string][]v1.Taint),
@ -388,7 +383,7 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
}
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
pod, err := tc.getPod(podUpdate.podName, podUpdate.podNamespace)
pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName)
if err != nil {
if apierrors.IsNotFound(err) {
// Delete
@ -428,7 +423,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
}
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
node, err := tc.getNode(nodeUpdate.nodeName)
node, err := tc.nodeLister.Get(nodeUpdate.nodeName)
if err != nil {
if apierrors.IsNotFound(err) {
// Delete

View File

@ -20,7 +20,6 @@ import (
"context"
"fmt"
"sort"
"sync"
"testing"
"time"
@ -28,21 +27,17 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache"
"k8s.io/kubernetes/pkg/controller/testutil"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clienttesting "k8s.io/client-go/testing"
)
var timeForControllerToProgress = 500 * time.Millisecond
func getPodFromClientset(clientset *fake.Clientset) GetPodFunc {
return func(name, namespace string) (*v1.Pod, error) {
return clientset.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{})
}
}
func getPodsAssignedToNode(c *fake.Clientset) GetPodsByNodeNameFunc {
return func(nodeName string) ([]*v1.Pod, error) {
selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
@ -61,46 +56,6 @@ func getPodsAssignedToNode(c *fake.Clientset) GetPodsByNodeNameFunc {
}
}
func getNodeFromClientset(clientset *fake.Clientset) GetNodeFunc {
return func(name string) (*v1.Node, error) {
return clientset.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
}
}
type podHolder struct {
pod *v1.Pod
sync.Mutex
}
func (p *podHolder) getPod(name, namespace string) (*v1.Pod, error) {
p.Lock()
defer p.Unlock()
return p.pod, nil
}
func (p *podHolder) setPod(pod *v1.Pod) {
p.Lock()
defer p.Unlock()
p.pod = pod
}
type nodeHolder struct {
lock sync.Mutex
node *v1.Node
}
func (n *nodeHolder) setNode(node *v1.Node) {
n.lock.Lock()
defer n.lock.Unlock()
n.node = node
}
func (n *nodeHolder) getNode(name string) (*v1.Node, error) {
n.lock.Lock()
defer n.lock.Unlock()
return n.node, nil
}
func createNoExecuteTaint(index int) v1.Taint {
now := metav1.Now()
return v1.Taint{
@ -133,6 +88,14 @@ func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node {
return node
}
func setupNewNoExecuteTaintManager(ctx context.Context, fakeClientSet *fake.Clientset) (*NoExecuteTaintManager, cache.Indexer, cache.Indexer) {
informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
podIndexer := informerFactory.Core().V1().Pods().Informer().GetIndexer()
nodeIndexer := informerFactory.Core().V1().Nodes().Informer().GetIndexer()
mgr := NewNoExecuteTaintManager(ctx, fakeClientSet, informerFactory.Core().V1().Pods().Lister(), informerFactory.Core().V1().Nodes().Lister(), getPodsAssignedToNode(fakeClientSet))
return mgr, podIndexer, nodeIndexer
}
type timestampedPod struct {
names []string
timestamp time.Duration
@ -219,10 +182,12 @@ func TestCreatePod(t *testing.T) {
for _, item := range testCases {
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(ctx, fakeClientset, (&podHolder{pod: item.pod}).getPod, getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller, podIndexer, _ := setupNewNoExecuteTaintManager(ctx, fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(ctx)
controller.taintedNodes = item.taintedNodes
podIndexer.Add(item.pod)
controller.PodUpdated(nil, item.pod)
// wait a bit
time.Sleep(timeForControllerToProgress)
@ -243,7 +208,7 @@ func TestCreatePod(t *testing.T) {
func TestDeletePod(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller, _, _ := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(context.TODO())
controller.taintedNodes = map[string][]v1.Taint{
@ -306,17 +271,17 @@ func TestUpdatePod(t *testing.T) {
for _, item := range testCases {
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset()
holder := &podHolder{}
controller := NewNoExecuteTaintManager(ctx, fakeClientset, holder.getPod, getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller, podIndexer, _ := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(ctx)
controller.taintedNodes = item.taintedNodes
holder.setPod(item.prevPod)
podIndexer.Add(item.prevPod)
controller.PodUpdated(nil, item.prevPod)
fakeClientset.ClearActions()
time.Sleep(timeForControllerToProgress)
holder.setPod(item.newPod)
podIndexer.Update(item.newPod)
controller.PodUpdated(item.prevPod, item.newPod)
// wait a bit
time.Sleep(timeForControllerToProgress)
@ -373,7 +338,8 @@ func TestCreateNode(t *testing.T) {
for _, item := range testCases {
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(ctx, fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.node}).getNode, getPodsAssignedToNode(fakeClientset))
controller, _, nodeIndexer := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
nodeIndexer.Add(item.node)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(ctx)
controller.NodeUpdated(nil, item.node)
@ -396,7 +362,7 @@ func TestCreateNode(t *testing.T) {
func TestDeleteNode(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(ctx, fakeClientset, getPodFromClientset(fakeClientset), getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller, _, _ := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
controller.taintedNodes = map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
@ -494,7 +460,8 @@ func TestUpdateNode(t *testing.T) {
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller, _, nodeIndexer := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
nodeIndexer.Add(item.newNode)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(context.TODO())
controller.NodeUpdated(item.oldNode, item.newNode)
@ -539,13 +506,12 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
ctx, cancel := context.WithCancel(context.TODO())
fakeClientset := fake.NewSimpleClientset(pod)
holder := &nodeHolder{node: untaintedNode}
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), (holder).getNode, getPodsAssignedToNode(fakeClientset))
controller, _, nodeIndexer := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(context.TODO())
// no taint
holder.setNode(untaintedNode)
nodeIndexer.Add(untaintedNode)
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
@ -553,7 +519,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
}
// no taint -> infinitely tolerated taint
holder.setNode(singleTaintedNode)
nodeIndexer.Update(singleTaintedNode)
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
@ -561,7 +527,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
}
// infinitely tolerated taint -> temporarily tolerated taint
holder.setNode(doubleTaintedNode)
nodeIndexer.Update(doubleTaintedNode)
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil {
@ -569,7 +535,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
}
// temporarily tolerated taint -> infinitely tolerated taint
holder.setNode(singleTaintedNode)
nodeIndexer.Update(singleTaintedNode)
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
@ -628,7 +594,8 @@ func TestUpdateNodeWithMultiplePods(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
sort.Sort(item.expectedDeleteTimes)
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller, _, nodeIndexer := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
nodeIndexer.Add(item.newNode)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(context.TODO())
controller.NodeUpdated(item.oldNode, item.newNode)
@ -827,13 +794,13 @@ func TestEventualConsistency(t *testing.T) {
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
holder := &podHolder{}
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, holder.getPod, (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller, podIndexer, nodeIndexer := setupNewNoExecuteTaintManager(context.TODO(), fakeClientset)
nodeIndexer.Add(item.newNode)
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(context.TODO())
if item.prevPod != nil {
holder.setPod(item.prevPod)
podIndexer.Add(item.prevPod)
controller.PodUpdated(nil, item.prevPod)
}
@ -854,7 +821,7 @@ func TestEventualConsistency(t *testing.T) {
fakeClientset.ClearActions()
// And now the delayed update of 'pod2' comes to the TaintManager. We should delete it as well.
holder.setPod(item.newPod)
podIndexer.Update(item.newPod)
controller.PodUpdated(item.prevPod, item.newPod)
// wait a bit
time.Sleep(timeForControllerToProgress)