From 3c0bc26d909c1f71151b99d1a85bc45f6d5a3ff1 Mon Sep 17 00:00:00 2001 From: Madhav Jivrajani Date: Thu, 2 Jun 2022 22:00:08 +0530 Subject: [PATCH] controller/nodelifecycle: Refactor to not make API calls under lock The evictorLock only protects zonePodEvictor and zoneNoExecuteTainter. processTaintBaseEviction showed indications of increased lock contention among goroutines (see issue 110341 for more details). The refactor done is to ensure that all codepaths in that function that hold the evictorLock AND make API calls under the lock, are now making API calls outside the lock and the lock is held only for accessing either zonePodEvictor or zoneNoExecuteTainter or both. Two other places where the refactor was done is the doEvictionPass and doNoExecuteTaintingPass functions which make multiple API calls under the evictorLock. Signed-off-by: Madhav Jivrajani --- .../node_lifecycle_controller.go | 76 +++++++++++++++---- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index cfb07d1872d..0b0bb0960f6 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -278,7 +278,6 @@ type Controller struct { nodeHealthMap *nodeHealthMap // evictorLock protects zonePodEvictor and zoneNoExecuteTainter. - // TODO(#83954): API calls shouldn't be executed under the lock. evictorLock sync.Mutex nodeEvictionMap *nodeEvictionMap // workers that evicts pods from unresponsive nodes. @@ -664,11 +663,32 @@ func (nc *Controller) doNoScheduleTaintingPass(ctx context.Context, nodeName str } func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) { - nc.evictorLock.Lock() - defer nc.evictorLock.Unlock() - for k := range nc.zoneNoExecuteTainter { + // Extract out the keys of the map in order to not hold + // the evictorLock for the entire function and hold it + // only when nescessary. + var zoneNoExecuteTainterKeys []string + func() { + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + + zoneNoExecuteTainterKeys = make([]string, 0, len(nc.zoneNoExecuteTainter)) + for k := range nc.zoneNoExecuteTainter { + zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k) + } + }() + for _, k := range zoneNoExecuteTainterKeys { + var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue + func() { + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + // Extracting the value without checking if the key + // exists or not is safe to do here since zones do + // not get removed, and consequently pod evictors for + // these zones also do not get removed, only added. + zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k] + }() // Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded). - nc.zoneNoExecuteTainter[k].Try(func(value scheduler.TimedValue) (bool, time.Duration) { + zoneNoExecuteTainterWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) { node, err := nc.nodeLister.Get(value.Value) if apierrors.IsNotFound(err) { klog.Warningf("Node %v no longer present in nodeLister!", value.Value) @@ -708,11 +728,34 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) { } func (nc *Controller) doEvictionPass(ctx context.Context) { - nc.evictorLock.Lock() - defer nc.evictorLock.Unlock() - for k := range nc.zonePodEvictor { + // Extract out the keys of the map in order to not hold + // the evictorLock for the entire function and hold it + // only when nescessary. + var zonePodEvictorKeys []string + func() { + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + + zonePodEvictorKeys = make([]string, 0, len(nc.zonePodEvictor)) + for k := range nc.zonePodEvictor { + zonePodEvictorKeys = append(zonePodEvictorKeys, k) + } + }() + + for _, k := range zonePodEvictorKeys { + var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue + func() { + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + // Extracting the value without checking if the key + // exists or not is safe to do here since zones do + // not get removed, and consequently pod evictors for + // these zones also do not get removed, only added. + zonePodEvictionWorker = nc.zonePodEvictor[k] + }() + // Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded). - nc.zonePodEvictor[k].Try(func(value scheduler.TimedValue) (bool, time.Duration) { + zonePodEvictionWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) { node, err := nc.nodeLister.Get(value.Value) if apierrors.IsNotFound(err) { klog.Warningf("Node %v no longer present in nodeLister!", value.Value) @@ -1406,11 +1449,11 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) { // returns true if an eviction was queued. func (nc *Controller) cancelPodEviction(node *v1.Node) bool { zone := nodetopology.GetZoneKey(node) - nc.evictorLock.Lock() - defer nc.evictorLock.Unlock() if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) { klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name) } + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name) if wasDeleting { klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name) @@ -1425,8 +1468,6 @@ func (nc *Controller) cancelPodEviction(node *v1.Node) bool { // - deletes pods immediately if node is already marked as evicted. // Returns false, because the node wasn't added to the queue. func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.Pod) (bool, error) { - nc.evictorLock.Lock() - defer nc.evictorLock.Unlock() status, ok := nc.nodeEvictionMap.getStatus(node.Name) if ok && status == evicted { // Node eviction already happened for this node. @@ -1440,6 +1481,10 @@ func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.P if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) { klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name) } + + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + return nc.zonePodEvictor[nodetopology.GetZoneKey(node)].Add(node.Name, string(node.UID)), nil } @@ -1462,8 +1507,6 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat } func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) { - nc.evictorLock.Lock() - defer nc.evictorLock.Unlock() err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate) if err != nil { klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err) @@ -1474,6 +1517,9 @@ func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (b klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err) return false, err } + nc.evictorLock.Lock() + defer nc.evictorLock.Unlock() + return nc.zoneNoExecuteTainter[nodetopology.GetZoneKey(node)].Remove(node.Name), nil }