mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-04 15:05:20 +00:00
Taint node in paralle.
Signed-off-by: Klaus Ma <klaus1982.cn@gmail.com>
This commit is contained in:
@@ -24,6 +24,8 @@ package nodelifecycle
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -46,6 +48,7 @@ import (
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/flowcontrol"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
v1node "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
@@ -218,6 +221,9 @@ type Controller struct {
|
||||
// if set to true, NodeController will taint Nodes based on its condition for 'NetworkUnavailable',
|
||||
// 'MemoryPressure', 'OutOfDisk' and 'DiskPressure'.
|
||||
taintNodeByCondition bool
|
||||
|
||||
nodeUpdateChannels []chan *v1.Node
|
||||
nodeUpdateQueue workqueue.Interface
|
||||
}
|
||||
|
||||
// NewNodeLifecycleController returns a new taint controller.
|
||||
@@ -276,6 +282,7 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
|
||||
runTaintManager: runTaintManager,
|
||||
useTaintBasedEvictions: useTaintBasedEvictions && runTaintManager,
|
||||
taintNodeByCondition: taintNodeByCondition,
|
||||
nodeUpdateQueue: workqueue.New(),
|
||||
}
|
||||
if useTaintBasedEvictions {
|
||||
glog.Infof("Controller is using taint based evictions.")
|
||||
@@ -343,10 +350,12 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
|
||||
glog.Infof("Controller will taint node by condition.")
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: nodeutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||
return nc.doNoScheduleTaintingPass(node)
|
||||
nc.nodeUpdateQueue.Add(node)
|
||||
return nil
|
||||
}),
|
||||
UpdateFunc: nodeutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
return nc.doNoScheduleTaintingPass(newNode)
|
||||
nc.nodeUpdateQueue.Add(newNode)
|
||||
return nil
|
||||
}),
|
||||
})
|
||||
}
|
||||
@@ -383,18 +392,52 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
|
||||
}
|
||||
|
||||
if nc.runTaintManager {
|
||||
go nc.taintManager.Run(wait.NeverStop)
|
||||
go nc.taintManager.Run(stopCh)
|
||||
}
|
||||
|
||||
if nc.taintNodeByCondition {
|
||||
for i := 0; i < scheduler.UpdateWorkerSize; i++ {
|
||||
nc.nodeUpdateChannels = append(nc.nodeUpdateChannels, make(chan *v1.Node, scheduler.NodeUpdateChannelSize))
|
||||
}
|
||||
|
||||
// Dispatcher
|
||||
go func(stopCh <-chan struct{}) {
|
||||
for {
|
||||
obj, shutdown := nc.nodeUpdateQueue.Get()
|
||||
if shutdown {
|
||||
break
|
||||
}
|
||||
|
||||
node := obj.(*v1.Node)
|
||||
hash := hash(node.Name, scheduler.UpdateWorkerSize)
|
||||
|
||||
select {
|
||||
case <-stopCh:
|
||||
nc.nodeUpdateQueue.Done(node)
|
||||
return
|
||||
case nc.nodeUpdateChannels[hash] <- node:
|
||||
}
|
||||
nc.nodeUpdateQueue.Done(node)
|
||||
}
|
||||
}(stopCh)
|
||||
// Close node update queue to cleanup go routine.
|
||||
defer nc.nodeUpdateQueue.ShutDown()
|
||||
|
||||
// Start workers to update NoSchedule taint for nodes.
|
||||
for i := 0; i < scheduler.UpdateWorkerSize; i++ {
|
||||
go nc.doNoScheduleTaintingPassWorker(i, stopCh)
|
||||
}
|
||||
}
|
||||
|
||||
if nc.useTaintBasedEvictions {
|
||||
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
|
||||
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
|
||||
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, wait.NeverStop)
|
||||
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
|
||||
} else {
|
||||
// Managing eviction of nodes:
|
||||
// When we delete pods off a node, if the node was not empty at the time we then
|
||||
// queue an eviction watcher. If we hit an error, retry deletion.
|
||||
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, wait.NeverStop)
|
||||
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
|
||||
}
|
||||
|
||||
// Incorporate the results of node status pushed from kubelet to master.
|
||||
@@ -402,7 +445,7 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
|
||||
if err := nc.monitorNodeStatus(); err != nil {
|
||||
glog.Errorf("Error monitoring node status: %v", err)
|
||||
}
|
||||
}, nc.nodeMonitorPeriod, wait.NeverStop)
|
||||
}, nc.nodeMonitorPeriod, stopCh)
|
||||
|
||||
<-stopCh
|
||||
}
|
||||
@@ -445,6 +488,19 @@ func (nc *Controller) doFixDeprecatedTaintKeyPass(node *v1.Node) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (nc *Controller) doNoScheduleTaintingPassWorker(i int, stopCh <-chan struct{}) {
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
case node := <-nc.nodeUpdateChannels[i]:
|
||||
if err := nc.doNoScheduleTaintingPass(node); err != nil {
|
||||
glog.Errorf("Failed to taint NoSchedule on node <%s>: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (nc *Controller) doNoScheduleTaintingPass(node *v1.Node) error {
|
||||
// Map node's condition to Taints.
|
||||
var taints []v1.Taint
|
||||
@@ -1197,3 +1253,9 @@ func (nc *Controller) ComputeZoneState(nodeReadyConditions []*v1.NodeCondition)
|
||||
return notReadyNodes, stateNormal
|
||||
}
|
||||
}
|
||||
|
||||
func hash(val string, max int) int {
|
||||
hasher := fnv.New32a()
|
||||
io.WriteString(hasher, val)
|
||||
return int(hasher.Sum32()) % max
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user