Migrate CIDR allocators to shared node informer

This commit is contained in:
Shyam Jeedigunta
2017-11-22 19:38:26 +01:00
parent f85649c6cd
commit 263dd1227d
6 changed files with 169 additions and 89 deletions

View File

@@ -25,16 +25,16 @@ import (
"k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
informers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/node/ipam/cidrset"
"k8s.io/kubernetes/pkg/controller/node/util"
)
@@ -45,6 +45,12 @@ type rangeAllocator struct {
clusterCIDR *net.IPNet
maxCIDRs int
// nodeLister is able to list/get nodes and is populated by the shared informer passed to
// NewCloudCIDRAllocator.
nodeLister corelisters.NodeLister
// nodesSynced returns true if the node shared informer has been synced at least once.
nodesSynced cache.InformerSynced
// Channel that is used to pass updating Nodes with assigned CIDRs to the background
// This increases a throughput of CIDR assignment by not blocking on long operations.
nodeCIDRUpdateChannel chan nodeAndCIDR
@@ -59,7 +65,7 @@ type rangeAllocator struct {
// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size.
// Caller must always pass in a list of existing nodes so the new allocator
// can initialize its CIDR map. NodeList is only nil in testing.
func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
if client == nil {
glog.Fatalf("kubeClient is nil when starting NodeController")
}
@@ -78,6 +84,8 @@ func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, s
client: client,
cidrs: set,
clusterCIDR: clusterCIDR,
nodeLister: nodeInformer.Lister(),
nodesSynced: nodeInformer.Informer().HasSynced,
nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize),
recorder: recorder,
nodesInProcessing: sets.NewString(),
@@ -107,14 +115,57 @@ func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, s
}
}
}
for i := 0; i < cidrUpdateWorkers; i++ {
// TODO: Take stopChan as an argument to NewCIDRRangeAllocator and pass it to the worker.
go ra.worker(wait.NeverStop)
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: util.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR),
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
// If the PodCIDR is not empty we either:
// - already processed a Node that already had a CIDR after NC restarted
// (cidr is marked as used),
// - already processed a Node successfully and allocated a CIDR for it
// (cidr is marked as used),
// - already processed a Node but we did saw a "timeout" response and
// request eventually got through in this case we haven't released
// the allocated CIDR (cidr is still marked as used).
// There's a possible error here:
// - NC sees a new Node and assigns a CIDR X to it,
// - Update Node call fails with a timeout,
// - Node is updated by some other component, NC sees an update and
// assigns CIDR Y to the Node,
// - Both CIDR X and CIDR Y are marked as used in the local cache,
// even though Node sees only CIDR Y
// The problem here is that in in-memory cache we see CIDR X as marked,
// which prevents it from being assigned to any new node. The cluster
// state is correct.
// Restart of NC fixes the issue.
if newNode.Spec.PodCIDR == "" {
return ra.AllocateOrOccupyCIDR(newNode)
}
return nil
}),
DeleteFunc: util.CreateDeleteNodeHandler(ra.ReleaseCIDR),
})
return ra, nil
}
func (r *rangeAllocator) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
glog.Infof("Starting range CIDR allocator")
defer glog.Infof("Shutting down range CIDR allocator")
if !controller.WaitForCacheSync("cidrallocator", stopCh, r.nodesSynced) {
return
}
for i := 0; i < cidrUpdateWorkers; i++ {
go r.worker(stopCh)
}
<-stopCh
}
func (r *rangeAllocator) worker(stopChan <-chan struct{}) {
for {
select {
@@ -232,7 +283,7 @@ func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
podCIDR := data.cidr.String()
for rep := 0; rep < cidrUpdateRetries; rep++ {
// TODO: change it to using PATCH instead of full Node updates.
node, err = r.client.CoreV1().Nodes().Get(data.nodeName, metav1.GetOptions{})
node, err = r.nodeLister.Get(data.nodeName)
if err != nil {
glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
continue
@@ -269,35 +320,3 @@ func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
}
return err
}
func (r *rangeAllocator) Register(nodeInformer informers.NodeInformer) {
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: util.CreateAddNodeHandler(r.AllocateOrOccupyCIDR),
UpdateFunc: util.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
// If the PodCIDR is not empty we either:
// - already processed a Node that already had a CIDR after NC restarted
// (cidr is marked as used),
// - already processed a Node successfully and allocated a CIDR for it
// (cidr is marked as used),
// - already processed a Node but we did saw a "timeout" response and
// request eventually got through in this case we haven't released
// the allocated CIDR (cidr is still marked as used).
// There's a possible error here:
// - NC sees a new Node and assigns a CIDR X to it,
// - Update Node call fails with a timeout,
// - Node is updated by some other component, NC sees an update and
// assigns CIDR Y to the Node,
// - Both CIDR X and CIDR Y are marked as used in the local cache,
// even though Node sees only CIDR Y
// The problem here is that in in-memory cache we see CIDR X as marked,
// which prevents it from being assigned to any new node. The cluster
// state is correct.
// Restart of NC fixes the issue.
if newNode.Spec.PodCIDR == "" {
return r.AllocateOrOccupyCIDR(newNode)
}
return nil
}),
DeleteFunc: util.CreateDeleteNodeHandler(r.ReleaseCIDR),
})
}