From b3637c967044460bcf766205e37563d3572d8ac5 Mon Sep 17 00:00:00 2001 From: kaiyuechen Date: Wed, 11 Mar 2020 23:34:53 +0800 Subject: [PATCH] Reconcile NoExecute Taint --- .../nodelifecycle/node_lifecycle_controller.go | 18 +++++++++++++++--- .../scheduler/rate_limited_queue.go | 14 ++++++++++++++ .../scheduler/rate_limited_queue_test.go | 11 +++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index 8a0cabdede6..caef6b19a4a 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -872,7 +872,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) { klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.") } - } else if nc.markNodeForTainting(node) { + } else if nc.markNodeForTainting(node, v1.ConditionFalse) { klog.V(2).Infof("Node %v is NotReady as of %v. Adding it to the Taint queue.", node.Name, decisionTimestamp, @@ -885,7 +885,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) { klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.") } - } else if nc.markNodeForTainting(node) { + } else if nc.markNodeForTainting(node, v1.ConditionUnknown) { klog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.", node.Name, decisionTimestamp, @@ -1476,9 +1476,21 @@ func (nc *Controller) evictPods(node *v1.Node, pods []*v1.Pod) (bool, error) { return nc.zonePodEvictor[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID)), nil } -func (nc *Controller) markNodeForTainting(node *v1.Node) bool { +func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStatus) bool { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() + if status == v1.ConditionFalse { + if !taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) { + nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].SetRemove(node.Name) + } + } + + if status == v1.ConditionUnknown { + if !taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) { + nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].SetRemove(node.Name) + } + } + return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID)) } diff --git a/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go b/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go index 03a1fcb889f..d79ee9ec04a 100644 --- a/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go +++ b/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go @@ -194,6 +194,15 @@ func (q *UniqueQueue) Clear() { } } +// SetRemove remove value from the set if value existed +func (q *UniqueQueue) SetRemove(value string) { + q.lock.Lock() + defer q.lock.Unlock() + if q.set.Has(value) { + q.set.Delete(value) + } +} + // RateLimitedTimedQueue is a unique item priority queue ordered by // the expected next time of execution. It is also rate limited. type RateLimitedTimedQueue struct { @@ -280,6 +289,11 @@ func (q *RateLimitedTimedQueue) Clear() { q.queue.Clear() } +// SetRemove remove value from the set of the queue +func (q *RateLimitedTimedQueue) SetRemove(value string) { + q.queue.SetRemove(value) +} + // SwapLimiter safely swaps current limiter for this queue with the // passed one if capacities or qps's differ. func (q *RateLimitedTimedQueue) SwapLimiter(newQPS float32) { diff --git a/pkg/controller/nodelifecycle/scheduler/rate_limited_queue_test.go b/pkg/controller/nodelifecycle/scheduler/rate_limited_queue_test.go index 644b6569039..00411792df8 100644 --- a/pkg/controller/nodelifecycle/scheduler/rate_limited_queue_test.go +++ b/pkg/controller/nodelifecycle/scheduler/rate_limited_queue_test.go @@ -282,6 +282,17 @@ func TestClear(t *testing.T) { } } +func TestSetRemove(t *testing.T) { + evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter()) + evictor.Add("first", "11111") + + evictor.SetRemove("first") + + if evictor.queue.set.Len() != 0 { + t.Fatalf("SetRemove should remove element from the set.") + } +} + func TestSwapLimiter(t *testing.T) { evictor := NewRateLimitedTimedQueue(flowcontrol.NewFakeAlwaysRateLimiter()) fakeAlways := flowcontrol.NewFakeAlwaysRateLimiter()