mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Merge pull request #110359 from MadhavJivrajani/remove-api-call-under-lock
controller/nodelifecycle: Refactor to not make API calls under lock
This commit is contained in:
commit
a6afdf45dd
@ -278,7 +278,6 @@ type Controller struct {
|
|||||||
nodeHealthMap *nodeHealthMap
|
nodeHealthMap *nodeHealthMap
|
||||||
|
|
||||||
// evictorLock protects zonePodEvictor and zoneNoExecuteTainter.
|
// evictorLock protects zonePodEvictor and zoneNoExecuteTainter.
|
||||||
// TODO(#83954): API calls shouldn't be executed under the lock.
|
|
||||||
evictorLock sync.Mutex
|
evictorLock sync.Mutex
|
||||||
nodeEvictionMap *nodeEvictionMap
|
nodeEvictionMap *nodeEvictionMap
|
||||||
// workers that evicts pods from unresponsive nodes.
|
// workers that evicts pods from unresponsive nodes.
|
||||||
@ -665,11 +664,32 @@ func (nc *Controller) doNoScheduleTaintingPass(ctx context.Context, nodeName str
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
||||||
|
// Extract out the keys of the map in order to not hold
|
||||||
|
// the evictorLock for the entire function and hold it
|
||||||
|
// only when nescessary.
|
||||||
|
var zoneNoExecuteTainterKeys []string
|
||||||
|
func() {
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
|
|
||||||
|
zoneNoExecuteTainterKeys = make([]string, 0, len(nc.zoneNoExecuteTainter))
|
||||||
for k := range nc.zoneNoExecuteTainter {
|
for k := range nc.zoneNoExecuteTainter {
|
||||||
|
zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
for _, k := range zoneNoExecuteTainterKeys {
|
||||||
|
var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue
|
||||||
|
func() {
|
||||||
|
nc.evictorLock.Lock()
|
||||||
|
defer nc.evictorLock.Unlock()
|
||||||
|
// Extracting the value without checking if the key
|
||||||
|
// exists or not is safe to do here since zones do
|
||||||
|
// not get removed, and consequently pod evictors for
|
||||||
|
// these zones also do not get removed, only added.
|
||||||
|
zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k]
|
||||||
|
}()
|
||||||
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
||||||
nc.zoneNoExecuteTainter[k].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
zoneNoExecuteTainterWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
node, err := nc.nodeLister.Get(value.Value)
|
node, err := nc.nodeLister.Get(value.Value)
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
||||||
@ -709,11 +729,34 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (nc *Controller) doEvictionPass(ctx context.Context) {
|
func (nc *Controller) doEvictionPass(ctx context.Context) {
|
||||||
|
// Extract out the keys of the map in order to not hold
|
||||||
|
// the evictorLock for the entire function and hold it
|
||||||
|
// only when nescessary.
|
||||||
|
var zonePodEvictorKeys []string
|
||||||
|
func() {
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
|
|
||||||
|
zonePodEvictorKeys = make([]string, 0, len(nc.zonePodEvictor))
|
||||||
for k := range nc.zonePodEvictor {
|
for k := range nc.zonePodEvictor {
|
||||||
|
zonePodEvictorKeys = append(zonePodEvictorKeys, k)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
for _, k := range zonePodEvictorKeys {
|
||||||
|
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
|
||||||
|
func() {
|
||||||
|
nc.evictorLock.Lock()
|
||||||
|
defer nc.evictorLock.Unlock()
|
||||||
|
// Extracting the value without checking if the key
|
||||||
|
// exists or not is safe to do here since zones do
|
||||||
|
// not get removed, and consequently pod evictors for
|
||||||
|
// these zones also do not get removed, only added.
|
||||||
|
zonePodEvictionWorker = nc.zonePodEvictor[k]
|
||||||
|
}()
|
||||||
|
|
||||||
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
||||||
nc.zonePodEvictor[k].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
zonePodEvictionWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
node, err := nc.nodeLister.Get(value.Value)
|
node, err := nc.nodeLister.Get(value.Value)
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
||||||
@ -1407,11 +1450,11 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
|||||||
// returns true if an eviction was queued.
|
// returns true if an eviction was queued.
|
||||||
func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
|
func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
|
||||||
zone := nodetopology.GetZoneKey(node)
|
zone := nodetopology.GetZoneKey(node)
|
||||||
nc.evictorLock.Lock()
|
|
||||||
defer nc.evictorLock.Unlock()
|
|
||||||
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
|
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
|
||||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
||||||
}
|
}
|
||||||
|
nc.evictorLock.Lock()
|
||||||
|
defer nc.evictorLock.Unlock()
|
||||||
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
|
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
|
||||||
if wasDeleting {
|
if wasDeleting {
|
||||||
klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name)
|
klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name)
|
||||||
@ -1426,8 +1469,6 @@ func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
|
|||||||
// - deletes pods immediately if node is already marked as evicted.
|
// - deletes pods immediately if node is already marked as evicted.
|
||||||
// Returns false, because the node wasn't added to the queue.
|
// Returns false, because the node wasn't added to the queue.
|
||||||
func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.Pod) (bool, error) {
|
func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.Pod) (bool, error) {
|
||||||
nc.evictorLock.Lock()
|
|
||||||
defer nc.evictorLock.Unlock()
|
|
||||||
status, ok := nc.nodeEvictionMap.getStatus(node.Name)
|
status, ok := nc.nodeEvictionMap.getStatus(node.Name)
|
||||||
if ok && status == evicted {
|
if ok && status == evicted {
|
||||||
// Node eviction already happened for this node.
|
// Node eviction already happened for this node.
|
||||||
@ -1441,6 +1482,10 @@ func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.P
|
|||||||
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
|
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
|
||||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nc.evictorLock.Lock()
|
||||||
|
defer nc.evictorLock.Unlock()
|
||||||
|
|
||||||
return nc.zonePodEvictor[nodetopology.GetZoneKey(node)].Add(node.Name, string(node.UID)), nil
|
return nc.zonePodEvictor[nodetopology.GetZoneKey(node)].Add(node.Name, string(node.UID)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1463,8 +1508,6 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
|
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
|
||||||
nc.evictorLock.Lock()
|
|
||||||
defer nc.evictorLock.Unlock()
|
|
||||||
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
|
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
||||||
@ -1475,6 +1518,9 @@ func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (b
|
|||||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
nc.evictorLock.Lock()
|
||||||
|
defer nc.evictorLock.Unlock()
|
||||||
|
|
||||||
return nc.zoneNoExecuteTainter[nodetopology.GetZoneKey(node)].Remove(node.Name), nil
|
return nc.zoneNoExecuteTainter[nodetopology.GetZoneKey(node)].Remove(node.Name), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user