Wire contexts to Core controllers

This commit is contained in:
Mike Dame
2021-04-22 14:27:59 -04:00
parent 657412713b
commit 4960d0976a
61 changed files with 842 additions and 780 deletions

View File

@@ -349,6 +349,7 @@ type Controller struct {
// NewNodeLifecycleController returns a new taint controller.
func NewNodeLifecycleController(
ctx context.Context,
leaseInformer coordinformers.LeaseInformer,
podInformer coreinformers.PodInformer,
nodeInformer coreinformers.NodeInformer,
@@ -484,7 +485,7 @@ func NewNodeLifecycleController(
podGetter := func(name, namespace string) (*v1.Pod, error) { return nc.podLister.Pods(namespace).Get(name) }
nodeLister := nodeInformer.Lister()
nodeGetter := func(name string) (*v1.Node, error) { return nodeLister.Get(name) }
nc.taintManager = scheduler.NewNoExecuteTaintManager(kubeClient, podGetter, nodeGetter, nc.getPodsAssignedToNode)
nc.taintManager = scheduler.NewNoExecuteTaintManager(ctx, kubeClient, podGetter, nodeGetter, nc.getPodsAssignedToNode)
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: nodeutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.taintManager.NodeUpdated(nil, node)
@@ -532,18 +533,18 @@ func NewNodeLifecycleController(
}
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *Controller) Run(stopCh <-chan struct{}) {
func (nc *Controller) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
klog.Infof("Starting node controller")
defer klog.Infof("Shutting down node controller")
if !cache.WaitForNamedCacheSync("taint", stopCh, nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
return
}
if nc.runTaintManager {
go nc.taintManager.Run(stopCh)
go nc.taintManager.Run(ctx)
}
// Close node update queue to cleanup go routine.
@@ -556,35 +557,35 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
// the item is flagged when got from queue: if new event come, the new item will
// be re-queued until "Done", so no more than one worker handle the same item and
// no event missed.
go wait.Until(nc.doNodeProcessingPassWorker, time.Second, stopCh)
go wait.UntilWithContext(ctx, nc.doNodeProcessingPassWorker, time.Second)
}
for i := 0; i < podUpdateWorkerSize; i++ {
go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh)
go wait.UntilWithContext(ctx, nc.doPodProcessingWorker, time.Second)
}
if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
go wait.UntilWithContext(ctx, nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod)
} else {
// Managing eviction of nodes:
// When we delete pods off a node, if the node was not empty at the time we then
// queue an eviction watcher. If we hit an error, retry deletion.
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
go wait.UntilWithContext(ctx, nc.doEvictionPass, scheduler.NodeEvictionPeriod)
}
// Incorporate the results of node health signal pushed from kubelet to master.
go wait.Until(func() {
if err := nc.monitorNodeHealth(); err != nil {
go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := nc.monitorNodeHealth(ctx); err != nil {
klog.Errorf("Error monitoring node health: %v", err)
}
}, nc.nodeMonitorPeriod, stopCh)
}, nc.nodeMonitorPeriod)
<-stopCh
<-ctx.Done()
}
func (nc *Controller) doNodeProcessingPassWorker() {
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
for {
obj, shutdown := nc.nodeUpdateQueue.Get()
// "nodeUpdateQueue" will be shutdown when "stopCh" closed;
@@ -593,7 +594,7 @@ func (nc *Controller) doNodeProcessingPassWorker() {
return
}
nodeName := obj.(string)
if err := nc.doNoScheduleTaintingPass(nodeName); err != nil {
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
klog.Errorf("Failed to taint NoSchedule on node <%s>, requeue it: %v", nodeName, err)
// TODO(k82cn): Add nodeName back to the queue
}
@@ -607,7 +608,7 @@ func (nc *Controller) doNodeProcessingPassWorker() {
}
}
func (nc *Controller) doNoScheduleTaintingPass(nodeName string) error {
func (nc *Controller) doNoScheduleTaintingPass(ctx context.Context, nodeName string) error {
node, err := nc.nodeLister.Get(nodeName)
if err != nil {
// If node not found, just ignore it.
@@ -656,13 +657,13 @@ func (nc *Controller) doNoScheduleTaintingPass(nodeName string) error {
if len(taintsToAdd) == 0 && len(taintsToDel) == 0 {
return nil
}
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, taintsToAdd, taintsToDel, node) {
if !nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, taintsToAdd, taintsToDel, node) {
return fmt.Errorf("failed to swap taints of node %+v", node)
}
return nil
}
func (nc *Controller) doNoExecuteTaintingPass() {
func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zoneNoExecuteTainter {
@@ -694,7 +695,7 @@ func (nc *Controller) doNoExecuteTaintingPass() {
return true, 0
}
result := nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
result := nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
if result {
//count the evictionsNumber
zone := utilnode.GetZoneKey(node)
@@ -706,7 +707,7 @@ func (nc *Controller) doNoExecuteTaintingPass() {
}
}
func (nc *Controller) doEvictionPass() {
func (nc *Controller) doEvictionPass(ctx context.Context) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
for k := range nc.zonePodEvictor {
@@ -724,7 +725,7 @@ func (nc *Controller) doEvictionPass() {
utilruntime.HandleError(fmt.Errorf("unable to list pods from node %q: %v", value.Value, err))
return false, 0
}
remaining, err := nodeutil.DeletePods(nc.kubeClient, pods, nc.recorder, value.Value, nodeUID, nc.daemonSetStore)
remaining, err := nodeutil.DeletePods(ctx, nc.kubeClient, pods, nc.recorder, value.Value, nodeUID, nc.daemonSetStore)
if err != nil {
// We are not setting eviction status here.
// New pods will be handled by zonePodEvictor retry
@@ -752,7 +753,7 @@ func (nc *Controller) doEvictionPass() {
// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown".
// This function will taint nodes who are not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeHealth() error {
func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
nodes, err := nc.nodeLister.List(labels.Everything())
@@ -771,7 +772,7 @@ func (nc *Controller) monitorNodeHealth() error {
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
if nc.runTaintManager {
nc.markNodeAsReachable(added[i])
nc.markNodeAsReachable(ctx, added[i])
} else {
nc.cancelPodEviction(added[i])
}
@@ -790,12 +791,12 @@ func (nc *Controller) monitorNodeHealth() error {
var currentReadyCondition *v1.NodeCondition
node := nodes[i].DeepCopy()
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) {
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(node)
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(ctx, node)
if err == nil {
return true, nil
}
name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
if err != nil {
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
return false, err
@@ -825,9 +826,9 @@ func (nc *Controller) monitorNodeHealth() error {
continue
}
if nc.runTaintManager {
nc.processTaintBaseEviction(node, &observedReadyCondition)
nc.processTaintBaseEviction(ctx, node, &observedReadyCondition)
} else {
if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil {
if err := nc.processNoTaintBaseEviction(ctx, node, &observedReadyCondition, gracePeriod, pods); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to evict all pods from node %v: %v; queuing for retry", node.Name, err))
}
}
@@ -839,7 +840,7 @@ func (nc *Controller) monitorNodeHealth() error {
nodeutil.RecordNodeStatusChange(nc.recorder, node, "NodeNotReady")
fallthrough
case needsRetry && observedReadyCondition.Status != v1.ConditionTrue:
if err = nodeutil.MarkPodsNotReady(nc.kubeClient, nc.recorder, pods, node.Name); err != nil {
if err = nodeutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, node.Name); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to mark all pods NotReady on node %v: %v; queuing for retry", node.Name, err))
nc.nodesToRetry.Store(node.Name, struct{}{})
continue
@@ -848,12 +849,12 @@ func (nc *Controller) monitorNodeHealth() error {
}
nc.nodesToRetry.Delete(node.Name)
}
nc.handleDisruption(zoneToNodeConditions, nodes)
nc.handleDisruption(ctx, zoneToNodeConditions, nodes)
return nil
}
func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondition *v1.NodeCondition) {
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
decisionTimestamp := nc.now()
// Check eviction timeout against decisionTimestamp
switch observedReadyCondition.Status {
@@ -861,7 +862,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
taintToAdd := *NotReadyTaintTemplate
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
if !nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node, v1.ConditionFalse) {
@@ -874,7 +875,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
if !nodeutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
@@ -884,7 +885,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
)
}
case v1.ConditionTrue:
removed, err := nc.markNodeAsReachable(node)
removed, err := nc.markNodeAsReachable(ctx, node)
if err != nil {
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
}
@@ -894,7 +895,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
}
}
func (nc *Controller) processNoTaintBaseEviction(node *v1.Node, observedReadyCondition *v1.NodeCondition, gracePeriod time.Duration, pods []*v1.Pod) error {
func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition, gracePeriod time.Duration, pods []*v1.Pod) error {
decisionTimestamp := nc.now()
nodeHealthData := nc.nodeHealthMap.getDeepCopy(node.Name)
if nodeHealthData == nil {
@@ -904,7 +905,7 @@ func (nc *Controller) processNoTaintBaseEviction(node *v1.Node, observedReadyCon
switch observedReadyCondition.Status {
case v1.ConditionFalse:
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
enqueued, err := nc.evictPods(node, pods)
enqueued, err := nc.evictPods(ctx, node, pods)
if err != nil {
return err
}
@@ -919,7 +920,7 @@ func (nc *Controller) processNoTaintBaseEviction(node *v1.Node, observedReadyCon
}
case v1.ConditionUnknown:
if decisionTimestamp.After(nodeHealthData.probeTimestamp.Add(nc.podEvictionTimeout)) {
enqueued, err := nc.evictPods(node, pods)
enqueued, err := nc.evictPods(ctx, node, pods)
if err != nil {
return err
}
@@ -953,7 +954,7 @@ func isNodeExcludedFromDisruptionChecks(node *v1.Node) bool {
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
nodeHealth := nc.nodeHealthMap.getDeepCopy(node.Name)
defer func() {
nc.nodeHealthMap.set(node.Name, nodeHealth)
@@ -1102,7 +1103,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
_, currentReadyCondition = nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(context.TODO(), node, metav1.UpdateOptions{}); err != nil {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
klog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
@@ -1119,7 +1120,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
}
func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
newZoneStates := map[string]ZoneState{}
allAreFullyDisrupted := true
for k, v := range zoneToNodeConditions {
@@ -1163,7 +1164,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(nodes[i])
_, err := nc.markNodeAsReachable(ctx, nodes[i])
if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
}
@@ -1227,7 +1228,7 @@ func (nc *Controller) podUpdated(oldPod, newPod *v1.Pod) {
}
}
func (nc *Controller) doPodProcessingWorker() {
func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
for {
obj, shutdown := nc.podUpdateQueue.Get()
// "podUpdateQueue" will be shutdown when "stopCh" closed;
@@ -1237,7 +1238,7 @@ func (nc *Controller) doPodProcessingWorker() {
}
podItem := obj.(podUpdateItem)
nc.processPod(podItem)
nc.processPod(ctx, podItem)
}
}
@@ -1245,7 +1246,7 @@ func (nc *Controller) doPodProcessingWorker() {
// 1. for NodeReady=true node, taint eviction for this pod will be cancelled
// 2. for NodeReady=false or unknown node, taint eviction of pod will happen and pod will be marked as not ready
// 3. if node doesn't exist in cache, it will be skipped and handled later by doEvictionPass
func (nc *Controller) processPod(podItem podUpdateItem) {
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
defer nc.podUpdateQueue.Done(podItem)
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
if err != nil {
@@ -1286,7 +1287,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager.
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
return
@@ -1294,7 +1295,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
}
if currentReadyCondition.Status != v1.ConditionTrue {
if err := nodeutil.MarkPodsNotReady(nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
if err := nodeutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
klog.Warningf("Unable to mark pod %+v NotReady on node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
}
@@ -1421,14 +1422,14 @@ func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
// Returns false if the node name was already enqueued.
// - deletes pods immediately if node is already marked as evicted.
// Returns false, because the node wasn't added to the queue.
func (nc *Controller) evictPods(node *v1.Node, pods []*v1.Pod) (bool, error) {
func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.Pod) (bool, error) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
status, ok := nc.nodeEvictionMap.getStatus(node.Name)
if ok && status == evicted {
// Node eviction already happened for this node.
// Handling immediate pod deletion.
_, err := nodeutil.DeletePods(nc.kubeClient, pods, nc.recorder, node.Name, string(node.UID), nc.daemonSetStore)
_, err := nodeutil.DeletePods(ctx, nc.kubeClient, pods, nc.recorder, node.Name, string(node.UID), nc.daemonSetStore)
if err != nil {
return false, fmt.Errorf("unable to delete pods from node %q: %v", node.Name, err)
}
@@ -1458,15 +1459,15 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Add(node.Name, string(node.UID))
}
func (nc *Controller) markNodeAsReachable(node *v1.Node) (bool, error) {
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
return false, err
}
err = controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
return false, err

View File

@@ -95,7 +95,7 @@ func (nc *nodeLifecycleController) doEviction(fakeNodeHandler *testutil.FakeNode
nc.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
uid, _ := value.UID.(string)
pods, _ := nc.getPodsAssignedToNode(value.Value)
nodeutil.DeletePods(fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
nodeutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
_ = nc.nodeEvictionMap.setStatus(value.Value, evicted)
return true, 0
})
@@ -144,6 +144,7 @@ func (nc *nodeLifecycleController) syncNodeStore(fakeNodeHandler *testutil.FakeN
}
func newNodeLifecycleControllerFromClient(
ctx context.Context,
kubeClient clientset.Interface,
podEvictionTimeout time.Duration,
evictionLimiterQPS float32,
@@ -163,6 +164,7 @@ func newNodeLifecycleControllerFromClient(
daemonSetInformer := factory.Apps().V1().DaemonSets()
nc, err := NewNodeLifecycleController(
ctx,
leaseInformer,
factory.Core().V1().Pods(),
nodeInformer,
@@ -679,6 +681,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
for _, item := range table {
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
item.fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -698,7 +701,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.timeToPass > 0 {
@@ -713,7 +716,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
zones := testutil.GetZones(item.fakeNodeHandler)
@@ -726,7 +729,7 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
t.Logf("listed pods %d for node %v", len(pods), value.Value)
nodeutil.DeletePods(item.fakeNodeHandler, pods, nodeController.recorder, value.Value, nodeUID, nodeController.daemonSetInformer.Lister())
nodeutil.DeletePods(context.TODO(), item.fakeNodeHandler, pods, nodeController.recorder, value.Value, nodeUID, nodeController.daemonSetInformer.Lister())
return true, 0
})
} else {
@@ -847,6 +850,7 @@ func TestPodStatusChange(t *testing.T) {
for _, item := range table {
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
item.fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -863,7 +867,7 @@ func TestPodStatusChange(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.timeToPass > 0 {
@@ -874,7 +878,7 @@ func TestPodStatusChange(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
zones := testutil.GetZones(item.fakeNodeHandler)
@@ -885,7 +889,7 @@ func TestPodStatusChange(t *testing.T) {
if err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeutil.DeletePods(item.fakeNodeHandler, pods, nodeController.recorder, value.Value, nodeUID, nodeController.daemonSetStore)
nodeutil.DeletePods(context.TODO(), item.fakeNodeHandler, pods, nodeController.recorder, value.Value, nodeUID, nodeController.daemonSetStore)
return true, 0
})
}
@@ -1408,6 +1412,7 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: item.podList}),
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -1430,7 +1435,7 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err)
}
@@ -1448,7 +1453,7 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("%v: unexpected error: %v", item.description, err)
}
for zone, state := range item.expectedFollowingStates {
@@ -1694,6 +1699,7 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
}
for i, item := range table {
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS,
@@ -1710,7 +1716,7 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.timeToPass > 0 {
@@ -1719,7 +1725,7 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
}
@@ -2237,6 +2243,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
for _, item := range testcases {
t.Run(item.description, func(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS,
@@ -2256,7 +2263,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
if err := nodeController.syncLeaseStore(item.lease); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if item.timeToPass > 0 {
@@ -2268,7 +2275,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
if err := nodeController.syncLeaseStore(item.newLease); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
@@ -2401,6 +2408,7 @@ func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
for i, item := range table {
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS,
@@ -2417,7 +2425,7 @@ func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("Case[%d] unexpected error: %v", i, err)
}
if item.timeToPass > 0 {
@@ -2426,7 +2434,7 @@ func TestMonitorNodeHealthMarkPodsNotReady(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("Case[%d] unexpected error: %v", i, err)
}
}
@@ -2584,6 +2592,7 @@ func TestMonitorNodeHealthMarkPodsNotReadyRetry(t *testing.T) {
for _, item := range table {
t.Run(item.desc, func(t *testing.T) {
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
item.fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS,
@@ -2606,7 +2615,7 @@ func TestMonitorNodeHealthMarkPodsNotReadyRetry(t *testing.T) {
if err := nodeController.syncNodeStore(item.fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
}
@@ -2718,6 +2727,7 @@ func TestApplyNoExecuteTaints(t *testing.T) {
}
originalTaint := UnreachableTaintTemplate
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -2734,10 +2744,10 @@ func TestApplyNoExecuteTaints(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doNoExecuteTaintingPass()
nodeController.doNoExecuteTaintingPass(context.TODO())
node0, err := fakeNodeHandler.Get(context.TODO(), "node0", metav1.GetOptions{})
if err != nil {
t.Errorf("Can't get current node0...")
@@ -2765,10 +2775,10 @@ func TestApplyNoExecuteTaints(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doNoExecuteTaintingPass()
nodeController.doNoExecuteTaintingPass(context.TODO())
node2, err = fakeNodeHandler.Get(context.TODO(), "node2", metav1.GetOptions{})
if err != nil {
@@ -2872,6 +2882,7 @@ func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
},
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -2889,10 +2900,10 @@ func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
// 1. monitor node health twice, add untainted node once
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -2986,14 +2997,14 @@ func TestApplyNoExecuteTaintsToNodesEnqueueTwice(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
// 3. start monitor node health again, add untainted node twice, construct UniqueQueue with duplicated node cache
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
// 4. do NoExecute taint pass
// when processing with node0, condition.Status is NodeReady, and return true with default case
// then remove the set value and queue value both, the taint job never stuck
nodeController.doNoExecuteTaintingPass()
nodeController.doNoExecuteTaintingPass(context.TODO())
// 5. get node3 and node5, see if it has ready got NoExecute taint
node3, err := fakeNodeHandler.Get(context.TODO(), "node3", metav1.GetOptions{})
@@ -3096,6 +3107,7 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
updatedTaint := NotReadyTaintTemplate
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -3112,10 +3124,10 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doNoExecuteTaintingPass()
nodeController.doNoExecuteTaintingPass(context.TODO())
node0, err := fakeNodeHandler.Get(context.TODO(), "node0", metav1.GetOptions{})
if err != nil {
@@ -3150,10 +3162,10 @@ func TestSwapUnreachableNotReadyTaints(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doNoExecuteTaintingPass()
nodeController.doNoExecuteTaintingPass(context.TODO())
node0, err = fakeNodeHandler.Get(context.TODO(), "node0", metav1.GetOptions{})
if err != nil {
@@ -3200,6 +3212,7 @@ func TestTaintsNodeByCondition(t *testing.T) {
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -3355,7 +3368,7 @@ func TestTaintsNodeByCondition(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.doNoScheduleTaintingPass(test.Node.Name)
nodeController.doNoScheduleTaintingPass(context.TODO(), test.Node.Name)
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -3402,6 +3415,7 @@ func TestNodeEventGeneration(t *testing.T) {
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
5*time.Minute,
testRateLimiterQPS,
@@ -3420,7 +3434,7 @@ func TestNodeEventGeneration(t *testing.T) {
if err := nodeController.syncNodeStore(fakeNodeHandler); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeHealth(); err != nil {
if err := nodeController.monitorNodeHealth(context.TODO()); err != nil {
t.Errorf("unexpected error: %v", err)
}
if len(fakeRecorder.Events) != 1 {
@@ -3475,6 +3489,7 @@ func TestReconcileNodeLabels(t *testing.T) {
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -3618,6 +3633,7 @@ func TestTryUpdateNodeHealth(t *testing.T) {
}
nodeController, _ := newNodeLifecycleControllerFromClient(
context.TODO(),
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
@@ -3790,7 +3806,7 @@ func TestTryUpdateNodeHealth(t *testing.T) {
probeTimestamp: test.node.CreationTimestamp,
readyTransitionTimestamp: test.node.CreationTimestamp,
})
_, _, currentReadyCondition, err := nodeController.tryUpdateNodeHealth(test.node)
_, _, currentReadyCondition, err := nodeController.tryUpdateNodeHealth(context.TODO(), test.node)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

View File

@@ -99,8 +99,8 @@ type NoExecuteTaintManager struct {
podUpdateQueue workqueue.Interface
}
func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName)) func(args *WorkArgs) error {
return func(args *WorkArgs) error {
func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName)) func(ctx context.Context, args *WorkArgs) error {
return func(ctx context.Context, args *WorkArgs) error {
ns := args.NamespacedName.Namespace
name := args.NamespacedName.Name
klog.V(0).InfoS("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
@@ -109,7 +109,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
}
var err error
for i := 0; i < retries; i++ {
err = c.CoreV1().Pods(ns).Delete(context.TODO(), name, metav1.DeleteOptions{})
err = c.CoreV1().Pods(ns).Delete(ctx, name, metav1.DeleteOptions{})
if err == nil {
break
}
@@ -155,7 +155,7 @@ func getMinTolerationTime(tolerations []v1.Toleration) time.Duration {
// NewNoExecuteTaintManager creates a new NoExecuteTaintManager that will use passed clientset to
// communicate with the API server.
func NewNoExecuteTaintManager(c clientset.Interface, getPod GetPodFunc, getNode GetNodeFunc, getPodsAssignedToNode GetPodsByNodeNameFunc) *NoExecuteTaintManager {
func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, getPod GetPodFunc, getNode GetNodeFunc, getPodsAssignedToNode GetPodsByNodeNameFunc) *NoExecuteTaintManager {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"})
eventBroadcaster.StartStructuredLogging(0)
@@ -183,7 +183,7 @@ func NewNoExecuteTaintManager(c clientset.Interface, getPod GetPodFunc, getNode
}
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
func (tc *NoExecuteTaintManager) Run(ctx context.Context) {
klog.V(0).InfoS("Starting NoExecuteTaintManager")
for i := 0; i < UpdateWorkerSize; i++ {
@@ -209,7 +209,7 @@ func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
// tc.nodeUpdateQueue.Done is called by the nodeUpdateChannels worker
}
}
}(stopCh)
}(ctx.Done())
go func(stopCh <-chan struct{}) {
for {
@@ -231,17 +231,17 @@ func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
// tc.podUpdateQueue.Done is called by the podUpdateChannels worker
}
}
}(stopCh)
}(ctx.Done())
wg := sync.WaitGroup{}
wg.Add(UpdateWorkerSize)
for i := 0; i < UpdateWorkerSize; i++ {
go tc.worker(i, wg.Done, stopCh)
go tc.worker(ctx, i, wg.Done, ctx.Done())
}
wg.Wait()
}
func (tc *NoExecuteTaintManager) worker(worker int, done func(), stopCh <-chan struct{}) {
func (tc *NoExecuteTaintManager) worker(ctx context.Context, worker int, done func(), stopCh <-chan struct{}) {
defer done()
// When processing events we want to prioritize Node updates over Pod updates,
@@ -253,7 +253,7 @@ func (tc *NoExecuteTaintManager) worker(worker int, done func(), stopCh <-chan s
case <-stopCh:
return
case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
tc.handleNodeUpdate(nodeUpdate)
tc.handleNodeUpdate(ctx, nodeUpdate)
tc.nodeUpdateQueue.Done(nodeUpdate)
case podUpdate := <-tc.podUpdateChannels[worker]:
// If we found a Pod update we need to empty Node queue first.
@@ -261,14 +261,14 @@ func (tc *NoExecuteTaintManager) worker(worker int, done func(), stopCh <-chan s
for {
select {
case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
tc.handleNodeUpdate(nodeUpdate)
tc.handleNodeUpdate(ctx, nodeUpdate)
tc.nodeUpdateQueue.Done(nodeUpdate)
default:
break priority
}
}
// After Node queue is emptied we process podUpdate.
tc.handlePodUpdate(podUpdate)
tc.handlePodUpdate(ctx, podUpdate)
tc.podUpdateQueue.Done(podUpdate)
}
}
@@ -338,6 +338,7 @@ func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName
}
func (tc *NoExecuteTaintManager) processPodOnNode(
ctx context.Context,
podNamespacedName types.NamespacedName,
nodeName string,
tolerations []v1.Toleration,
@@ -352,7 +353,7 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
klog.V(2).InfoS("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", nodeName)
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
tc.cancelWorkWithEvent(podNamespacedName)
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
return
}
minTolerationTime := getMinTolerationTime(usedTolerations)
@@ -373,10 +374,10 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
}
tc.cancelWorkWithEvent(podNamespacedName)
}
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
}
func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate podUpdateItem) {
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
pod, err := tc.getPod(podUpdate.podName, podUpdate.podNamespace)
if err != nil {
if apierrors.IsNotFound(err) {
@@ -413,10 +414,10 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate podUpdateItem) {
if !ok {
return
}
tc.processPodOnNode(podNamespacedName, nodeName, pod.Spec.Tolerations, taints, time.Now())
tc.processPodOnNode(ctx, podNamespacedName, nodeName, pod.Spec.Tolerations, taints, time.Now())
}
func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
node, err := tc.getNode(nodeUpdate.nodeName)
if err != nil {
if apierrors.IsNotFound(err) {
@@ -468,7 +469,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
now := time.Now()
for _, pod := range pods {
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
tc.processPodOnNode(podNamespacedName, node.Name, pod.Spec.Tolerations, taints, now)
tc.processPodOnNode(ctx, podNamespacedName, node.Name, pod.Spec.Tolerations, taints, now)
}
}

View File

@@ -217,11 +217,11 @@ func TestCreatePod(t *testing.T) {
}
for _, item := range testCases {
stopCh := make(chan struct{})
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset, (&podHolder{pod: item.pod}).getPod, getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(ctx, fakeClientset, (&podHolder{pod: item.pod}).getPod, getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(ctx)
controller.taintedNodes = item.taintedNodes
controller.PodUpdated(nil, item.pod)
// wait a bit
@@ -236,16 +236,16 @@ func TestCreatePod(t *testing.T) {
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexpected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
cancel()
}
}
func TestDeletePod(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(context.TODO())
controller.taintedNodes = map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
}
@@ -304,12 +304,12 @@ func TestUpdatePod(t *testing.T) {
}
for _, item := range testCases {
stopCh := make(chan struct{})
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset()
holder := &podHolder{}
controller := NewNoExecuteTaintManager(fakeClientset, holder.getPod, getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(ctx, fakeClientset, holder.getPod, getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(ctx)
controller.taintedNodes = item.taintedNodes
holder.setPod(item.prevPod)
@@ -333,7 +333,7 @@ func TestUpdatePod(t *testing.T) {
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexpected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
cancel()
}
}
@@ -371,11 +371,11 @@ func TestCreateNode(t *testing.T) {
}
for _, item := range testCases {
stopCh := make(chan struct{})
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.node}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(ctx, fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.node}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(ctx)
controller.NodeUpdated(nil, item.node)
// wait a bit
time.Sleep(timeForControllerToProgress)
@@ -389,19 +389,19 @@ func TestCreateNode(t *testing.T) {
if podDeleted != item.expectDelete {
t.Errorf("%v: Unexpected test result. Expected delete %v, got %v", item.description, item.expectDelete, podDeleted)
}
close(stopCh)
cancel()
}
}
func TestDeleteNode(t *testing.T) {
stopCh := make(chan struct{})
ctx, cancel := context.WithCancel(context.Background())
fakeClientset := fake.NewSimpleClientset()
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(ctx, fakeClientset, getPodFromClientset(fakeClientset), getNodeFromClientset(fakeClientset), getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
controller.taintedNodes = map[string][]v1.Taint{
"node1": {createNoExecuteTaint(1)},
}
go controller.Run(stopCh)
go controller.Run(ctx)
controller.NodeUpdated(testutil.NewNode("node1"), nil)
// wait a bit to see if nothing will panic
time.Sleep(timeForControllerToProgress)
@@ -410,7 +410,7 @@ func TestDeleteNode(t *testing.T) {
t.Error("Node should have been deleted from taintedNodes list")
}
controller.taintedNodesLock.Unlock()
close(stopCh)
cancel()
}
func TestUpdateNode(t *testing.T) {
@@ -494,9 +494,9 @@ func TestUpdateNode(t *testing.T) {
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(context.TODO())
controller.NodeUpdated(item.oldNode, item.newNode)
// wait a bit
time.Sleep(timeForControllerToProgress)
@@ -537,16 +537,16 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
singleTaintedNode := testutil.NewNode("node1")
singleTaintedNode.Spec.Taints = []v1.Taint{taint1}
stopCh := make(chan struct{})
ctx, cancel := context.WithCancel(context.TODO())
fakeClientset := fake.NewSimpleClientset(pod)
holder := &nodeHolder{node: untaintedNode}
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (holder).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), (holder).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(context.TODO())
// no taint
holder.setNode(untaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
t.Fatalf("pod queued for deletion with no taints")
@@ -554,7 +554,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
// no taint -> infinitely tolerated taint
holder.setNode(singleTaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
t.Fatalf("pod queued for deletion with permanently tolerated taint")
@@ -562,7 +562,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
// infinitely tolerated taint -> temporarily tolerated taint
holder.setNode(doubleTaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil {
t.Fatalf("pod not queued for deletion after addition of temporarily tolerated taint")
@@ -570,7 +570,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
// temporarily tolerated taint -> infinitely tolerated taint
holder.setNode(singleTaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
t.Fatalf("pod queued for deletion after removal of temporarily tolerated taint")
@@ -582,7 +582,7 @@ func TestUpdateNodeWithMultipleTaints(t *testing.T) {
t.Error("Unexpected deletion")
}
}
close(stopCh)
cancel()
}
func TestUpdateNodeWithMultiplePods(t *testing.T) {
@@ -628,9 +628,9 @@ func TestUpdateNodeWithMultiplePods(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
sort.Sort(item.expectedDeleteTimes)
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(context.TODO())
controller.NodeUpdated(item.oldNode, item.newNode)
startedAt := time.Now()
@@ -828,9 +828,9 @@ func TestEventualConsistency(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
holder := &podHolder{}
controller := NewNoExecuteTaintManager(fakeClientset, holder.getPod, (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(context.TODO(), fakeClientset, holder.getPod, (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
go controller.Run(context.TODO())
if item.prevPod != nil {
holder.setPod(item.prevPod)

View File

@@ -17,6 +17,7 @@ limitations under the License.
package scheduler
import (
"context"
"sync"
"time"
@@ -49,13 +50,13 @@ type TimedWorker struct {
}
// createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
func createWorker(args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
delay := fireAt.Sub(createdAt)
if delay <= 0 {
go f(args)
go f(ctx, args)
return nil
}
timer := clock.AfterFunc(delay, func() { f(args) })
timer := clock.AfterFunc(delay, func() { f(ctx, args) })
return &TimedWorker{
WorkItem: args,
CreatedAt: createdAt,
@@ -76,13 +77,13 @@ type TimedWorkerQueue struct {
sync.Mutex
// map of workers keyed by string returned by 'KeyFromWorkArgs' from the given worker.
workers map[string]*TimedWorker
workFunc func(args *WorkArgs) error
workFunc func(ctx context.Context, args *WorkArgs) error
clock clock.WithDelayedExecution
}
// CreateWorkerQueue creates a new TimedWorkerQueue for workers that will execute
// given function `f`.
func CreateWorkerQueue(f func(args *WorkArgs) error) *TimedWorkerQueue {
func CreateWorkerQueue(f func(ctx context.Context, args *WorkArgs) error) *TimedWorkerQueue {
return &TimedWorkerQueue{
workers: make(map[string]*TimedWorker),
workFunc: f,
@@ -90,9 +91,9 @@ func CreateWorkerQueue(f func(args *WorkArgs) error) *TimedWorkerQueue {
}
}
func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(args *WorkArgs) error {
return func(args *WorkArgs) error {
err := q.workFunc(args)
func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Context, args *WorkArgs) error {
return func(ctx context.Context, args *WorkArgs) error {
err := q.workFunc(ctx, args)
q.Lock()
defer q.Unlock()
if err == nil {
@@ -107,7 +108,7 @@ func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(args *WorkArgs)
}
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
func (q *TimedWorkerQueue) AddWork(args *WorkArgs, createdAt time.Time, fireAt time.Time) {
func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) {
key := args.KeyFromWorkArgs()
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
@@ -117,7 +118,7 @@ func (q *TimedWorkerQueue) AddWork(args *WorkArgs, createdAt time.Time, fireAt t
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
return
}
worker := createWorker(args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
q.workers[key] = worker
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package scheduler
import (
"context"
"sync"
"sync/atomic"
"testing"
@@ -29,23 +30,23 @@ func TestExecute(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(5)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
})
now := time.Now()
queue.AddWork(NewWorkArgs("1", "1"), now, now)
queue.AddWork(NewWorkArgs("2", "2"), now, now)
queue.AddWork(NewWorkArgs("3", "3"), now, now)
queue.AddWork(NewWorkArgs("4", "4"), now, now)
queue.AddWork(NewWorkArgs("5", "5"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, now)
// Adding the same thing second time should be no-op
queue.AddWork(NewWorkArgs("1", "1"), now, now)
queue.AddWork(NewWorkArgs("2", "2"), now, now)
queue.AddWork(NewWorkArgs("3", "3"), now, now)
queue.AddWork(NewWorkArgs("4", "4"), now, now)
queue.AddWork(NewWorkArgs("5", "5"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, now)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, now)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
if lastVal != 5 {
@@ -57,7 +58,7 @@ func TestExecuteDelayed(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(5)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
@@ -66,16 +67,16 @@ func TestExecuteDelayed(t *testing.T) {
then := now.Add(10 * time.Second)
fakeClock := testingclock.NewFakeClock(now)
queue.clock = fakeClock
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
fakeClock.Step(11 * time.Second)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
@@ -88,7 +89,7 @@ func TestCancel(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(3)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
@@ -97,16 +98,16 @@ func TestCancel(t *testing.T) {
then := now.Add(10 * time.Second)
fakeClock := testingclock.NewFakeClock(now)
queue.clock = fakeClock
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
fakeClock.Step(11 * time.Second)
@@ -121,7 +122,7 @@ func TestCancelAndReadd(t *testing.T) {
testVal := int32(0)
wg := sync.WaitGroup{}
wg.Add(4)
queue := CreateWorkerQueue(func(args *WorkArgs) error {
queue := CreateWorkerQueue(func(ctx context.Context, args *WorkArgs) error {
atomic.AddInt32(&testVal, 1)
wg.Done()
return nil
@@ -130,19 +131,19 @@ func TestCancelAndReadd(t *testing.T) {
then := now.Add(10 * time.Second)
fakeClock := testingclock.NewFakeClock(now)
queue.clock = fakeClock
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(NewWorkArgs("1", "1"), now, then)
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(NewWorkArgs("3", "3"), now, then)
queue.AddWork(NewWorkArgs("4", "4"), now, then)
queue.AddWork(NewWorkArgs("5", "5"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("1", "1"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
queue.AddWork(NewWorkArgs("2", "2"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
fakeClock.Step(11 * time.Second)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)