mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-05 23:47:50 +00:00
Migrate CIDR allocators to shared node informer
This commit is contained in:
@@ -25,16 +25,16 @@ import (
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
informers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
|
||||
"k8s.io/kubernetes/pkg/controller/node/util"
|
||||
)
|
||||
@@ -45,6 +45,12 @@ type rangeAllocator struct {
|
||||
clusterCIDR *net.IPNet
|
||||
maxCIDRs int
|
||||
|
||||
// nodeLister is able to list/get nodes and is populated by the shared informer passed to
|
||||
// NewCloudCIDRAllocator.
|
||||
nodeLister corelisters.NodeLister
|
||||
// nodesSynced returns true if the node shared informer has been synced at least once.
|
||||
nodesSynced cache.InformerSynced
|
||||
|
||||
// Channel that is used to pass updating Nodes with assigned CIDRs to the background
|
||||
// This increases a throughput of CIDR assignment by not blocking on long operations.
|
||||
nodeCIDRUpdateChannel chan nodeAndCIDR
|
||||
@@ -59,7 +65,7 @@ type rangeAllocator struct {
|
||||
// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size.
|
||||
// Caller must always pass in a list of existing nodes so the new allocator
|
||||
// can initialize its CIDR map. NodeList is only nil in testing.
|
||||
func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
|
||||
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
|
||||
if client == nil {
|
||||
glog.Fatalf("kubeClient is nil when starting NodeController")
|
||||
}
|
||||
@@ -78,6 +84,8 @@ func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, s
|
||||
client: client,
|
||||
cidrs: set,
|
||||
clusterCIDR: clusterCIDR,
|
||||
nodeLister: nodeInformer.Lister(),
|
||||
nodesSynced: nodeInformer.Informer().HasSynced,
|
||||
nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize),
|
||||
recorder: recorder,
|
||||
nodesInProcessing: sets.NewString(),
|
||||
@@ -107,14 +115,57 @@ func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, s
|
||||
}
|
||||
}
|
||||
}
|
||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||
// TODO: Take stopChan as an argument to NewCIDRRangeAllocator and pass it to the worker.
|
||||
go ra.worker(wait.NeverStop)
|
||||
}
|
||||
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: util.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR),
|
||||
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
// If the PodCIDR is not empty we either:
|
||||
// - already processed a Node that already had a CIDR after NC restarted
|
||||
// (cidr is marked as used),
|
||||
// - already processed a Node successfully and allocated a CIDR for it
|
||||
// (cidr is marked as used),
|
||||
// - already processed a Node but we did saw a "timeout" response and
|
||||
// request eventually got through in this case we haven't released
|
||||
// the allocated CIDR (cidr is still marked as used).
|
||||
// There's a possible error here:
|
||||
// - NC sees a new Node and assigns a CIDR X to it,
|
||||
// - Update Node call fails with a timeout,
|
||||
// - Node is updated by some other component, NC sees an update and
|
||||
// assigns CIDR Y to the Node,
|
||||
// - Both CIDR X and CIDR Y are marked as used in the local cache,
|
||||
// even though Node sees only CIDR Y
|
||||
// The problem here is that in in-memory cache we see CIDR X as marked,
|
||||
// which prevents it from being assigned to any new node. The cluster
|
||||
// state is correct.
|
||||
// Restart of NC fixes the issue.
|
||||
if newNode.Spec.PodCIDR == "" {
|
||||
return ra.AllocateOrOccupyCIDR(newNode)
|
||||
}
|
||||
return nil
|
||||
}),
|
||||
DeleteFunc: util.CreateDeleteNodeHandler(ra.ReleaseCIDR),
|
||||
})
|
||||
|
||||
return ra, nil
|
||||
}
|
||||
|
||||
func (r *rangeAllocator) Run(stopCh <-chan struct{}) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
glog.Infof("Starting range CIDR allocator")
|
||||
defer glog.Infof("Shutting down range CIDR allocator")
|
||||
|
||||
if !controller.WaitForCacheSync("cidrallocator", stopCh, r.nodesSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||
go r.worker(stopCh)
|
||||
}
|
||||
|
||||
<-stopCh
|
||||
}
|
||||
|
||||
func (r *rangeAllocator) worker(stopChan <-chan struct{}) {
|
||||
for {
|
||||
select {
|
||||
@@ -232,7 +283,7 @@ func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
|
||||
podCIDR := data.cidr.String()
|
||||
for rep := 0; rep < cidrUpdateRetries; rep++ {
|
||||
// TODO: change it to using PATCH instead of full Node updates.
|
||||
node, err = r.client.CoreV1().Nodes().Get(data.nodeName, metav1.GetOptions{})
|
||||
node, err = r.nodeLister.Get(data.nodeName)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
|
||||
continue
|
||||
@@ -269,35 +320,3 @@ func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *rangeAllocator) Register(nodeInformer informers.NodeInformer) {
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: util.CreateAddNodeHandler(r.AllocateOrOccupyCIDR),
|
||||
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
// If the PodCIDR is not empty we either:
|
||||
// - already processed a Node that already had a CIDR after NC restarted
|
||||
// (cidr is marked as used),
|
||||
// - already processed a Node successfully and allocated a CIDR for it
|
||||
// (cidr is marked as used),
|
||||
// - already processed a Node but we did saw a "timeout" response and
|
||||
// request eventually got through in this case we haven't released
|
||||
// the allocated CIDR (cidr is still marked as used).
|
||||
// There's a possible error here:
|
||||
// - NC sees a new Node and assigns a CIDR X to it,
|
||||
// - Update Node call fails with a timeout,
|
||||
// - Node is updated by some other component, NC sees an update and
|
||||
// assigns CIDR Y to the Node,
|
||||
// - Both CIDR X and CIDR Y are marked as used in the local cache,
|
||||
// even though Node sees only CIDR Y
|
||||
// The problem here is that in in-memory cache we see CIDR X as marked,
|
||||
// which prevents it from being assigned to any new node. The cluster
|
||||
// state is correct.
|
||||
// Restart of NC fixes the issue.
|
||||
if newNode.Spec.PodCIDR == "" {
|
||||
return r.AllocateOrOccupyCIDR(newNode)
|
||||
}
|
||||
return nil
|
||||
}),
|
||||
DeleteFunc: util.CreateDeleteNodeHandler(r.ReleaseCIDR),
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user