mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 04:33:26 +00:00
Merge pull request #100110 from CecileRobertMichon/azure-vm-cache
Cherry pick #537 from cloud provider azure: Refresh VM cache when node is not found
This commit is contained in:
commit
830055379b
@ -274,6 +274,8 @@ type Cloud struct {
|
|||||||
ipv6DualStackEnabled bool
|
ipv6DualStackEnabled bool
|
||||||
// Lock for access to node caches, includes nodeZones, nodeResourceGroups, and unmanagedNodes.
|
// Lock for access to node caches, includes nodeZones, nodeResourceGroups, and unmanagedNodes.
|
||||||
nodeCachesLock sync.RWMutex
|
nodeCachesLock sync.RWMutex
|
||||||
|
// nodeNames holds current nodes for tracking added nodes in VM caches.
|
||||||
|
nodeNames sets.String
|
||||||
// nodeZones is a mapping from Zone to a sets.String of Node's names in the Zone
|
// nodeZones is a mapping from Zone to a sets.String of Node's names in the Zone
|
||||||
// it is updated by the nodeInformer
|
// it is updated by the nodeInformer
|
||||||
nodeZones map[string]sets.String
|
nodeZones map[string]sets.String
|
||||||
@ -342,6 +344,7 @@ func NewCloudWithoutFeatureGates(configReader io.Reader) (*Cloud, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
az := &Cloud{
|
az := &Cloud{
|
||||||
|
nodeNames: sets.NewString(),
|
||||||
nodeZones: map[string]sets.String{},
|
nodeZones: map[string]sets.String{},
|
||||||
nodeResourceGroups: map[string]string{},
|
nodeResourceGroups: map[string]string{},
|
||||||
unmanagedNodes: sets.NewString(),
|
unmanagedNodes: sets.NewString(),
|
||||||
@ -782,6 +785,9 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
|
|||||||
defer az.nodeCachesLock.Unlock()
|
defer az.nodeCachesLock.Unlock()
|
||||||
|
|
||||||
if prevNode != nil {
|
if prevNode != nil {
|
||||||
|
// Remove from nodeNames cache.
|
||||||
|
az.nodeNames.Delete(prevNode.ObjectMeta.Name)
|
||||||
|
|
||||||
// Remove from nodeZones cache.
|
// Remove from nodeZones cache.
|
||||||
prevZone, ok := prevNode.ObjectMeta.Labels[LabelFailureDomainBetaZone]
|
prevZone, ok := prevNode.ObjectMeta.Labels[LabelFailureDomainBetaZone]
|
||||||
if ok && az.isAvailabilityZone(prevZone) {
|
if ok && az.isAvailabilityZone(prevZone) {
|
||||||
@ -805,6 +811,9 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if newNode != nil {
|
if newNode != nil {
|
||||||
|
// Add to nodeNames cache.
|
||||||
|
az.nodeNames.Insert(newNode.ObjectMeta.Name)
|
||||||
|
|
||||||
// Add to nodeZones cache.
|
// Add to nodeZones cache.
|
||||||
newZone, ok := newNode.ObjectMeta.Labels[LabelFailureDomainBetaZone]
|
newZone, ok := newNode.ObjectMeta.Labels[LabelFailureDomainBetaZone]
|
||||||
if ok && az.isAvailabilityZone(newZone) {
|
if ok && az.isAvailabilityZone(newZone) {
|
||||||
@ -876,6 +885,22 @@ func (az *Cloud) GetNodeResourceGroup(nodeName string) (string, error) {
|
|||||||
return az.ResourceGroup, nil
|
return az.ResourceGroup, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetNodeNames returns a set of all node names in the k8s cluster.
|
||||||
|
func (az *Cloud) GetNodeNames() (sets.String, error) {
|
||||||
|
// Kubelet won't set az.nodeInformerSynced, return nil.
|
||||||
|
if az.nodeInformerSynced == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
az.nodeCachesLock.RLock()
|
||||||
|
defer az.nodeCachesLock.RUnlock()
|
||||||
|
if !az.nodeInformerSynced() {
|
||||||
|
return nil, fmt.Errorf("node informer is not synced when trying to GetNodeNames")
|
||||||
|
}
|
||||||
|
|
||||||
|
return sets.NewString(az.nodeNames.List()...), nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetResourceGroups returns a set of resource groups that all nodes are running on.
|
// GetResourceGroups returns a set of resource groups that all nodes are running on.
|
||||||
func (az *Cloud) GetResourceGroups() (sets.String, error) {
|
func (az *Cloud) GetResourceGroups() (sets.String, error) {
|
||||||
// Kubelet won't set az.nodeInformerSynced, always return configured resourceGroup.
|
// Kubelet won't set az.nodeInformerSynced, always return configured resourceGroup.
|
||||||
|
@ -3244,6 +3244,7 @@ func TestUpdateNodeCaches(t *testing.T) {
|
|||||||
az.nodeZones = map[string]sets.String{zone: nodesInZone}
|
az.nodeZones = map[string]sets.String{zone: nodesInZone}
|
||||||
az.nodeResourceGroups = map[string]string{"prevNode": "rg"}
|
az.nodeResourceGroups = map[string]string{"prevNode": "rg"}
|
||||||
az.unmanagedNodes = sets.NewString("prevNode")
|
az.unmanagedNodes = sets.NewString("prevNode")
|
||||||
|
az.nodeNames = sets.NewString("prevNode")
|
||||||
|
|
||||||
prevNode := v1.Node{
|
prevNode := v1.Node{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@ -3260,6 +3261,7 @@ func TestUpdateNodeCaches(t *testing.T) {
|
|||||||
assert.Equal(t, 0, len(az.nodeZones[zone]))
|
assert.Equal(t, 0, len(az.nodeZones[zone]))
|
||||||
assert.Equal(t, 0, len(az.nodeResourceGroups))
|
assert.Equal(t, 0, len(az.nodeResourceGroups))
|
||||||
assert.Equal(t, 0, len(az.unmanagedNodes))
|
assert.Equal(t, 0, len(az.unmanagedNodes))
|
||||||
|
assert.Equal(t, 0, len(az.nodeNames))
|
||||||
|
|
||||||
newNode := v1.Node{
|
newNode := v1.Node{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
@ -3276,6 +3278,7 @@ func TestUpdateNodeCaches(t *testing.T) {
|
|||||||
assert.Equal(t, 1, len(az.nodeZones[zone]))
|
assert.Equal(t, 1, len(az.nodeZones[zone]))
|
||||||
assert.Equal(t, 1, len(az.nodeResourceGroups))
|
assert.Equal(t, 1, len(az.nodeResourceGroups))
|
||||||
assert.Equal(t, 1, len(az.unmanagedNodes))
|
assert.Equal(t, 1, len(az.unmanagedNodes))
|
||||||
|
assert.Equal(t, 1, len(az.nodeNames))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetActiveZones(t *testing.T) {
|
func TestGetActiveZones(t *testing.T) {
|
||||||
|
@ -74,7 +74,8 @@ type scaleSet struct {
|
|||||||
*Cloud
|
*Cloud
|
||||||
|
|
||||||
// availabilitySet is also required for scaleSet because some instances
|
// availabilitySet is also required for scaleSet because some instances
|
||||||
// (e.g. master nodes) may not belong to any scale sets.
|
// (e.g. control plane nodes) may not belong to any scale sets.
|
||||||
|
// this also allows for clusters with both VM and VMSS nodes.
|
||||||
availabilitySet VMSet
|
availabilitySet VMSet
|
||||||
|
|
||||||
vmssCache *azcache.TimedCache
|
vmssCache *azcache.TimedCache
|
||||||
|
@ -58,6 +58,11 @@ type vmssEntry struct {
|
|||||||
lastUpdate time.Time
|
lastUpdate time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type availabilitySetEntry struct {
|
||||||
|
vmNames sets.String
|
||||||
|
nodeNames sets.String
|
||||||
|
}
|
||||||
|
|
||||||
func (ss *scaleSet) newVMSSCache() (*azcache.TimedCache, error) {
|
func (ss *scaleSet) newVMSSCache() (*azcache.TimedCache, error) {
|
||||||
getter := func(key string) (interface{}, error) {
|
getter := func(key string) (interface{}, error) {
|
||||||
localCache := &sync.Map{} // [vmssName]*vmssEntry
|
localCache := &sync.Map{} // [vmssName]*vmssEntry
|
||||||
@ -273,7 +278,7 @@ func (ss *scaleSet) deleteCacheForNode(nodeName string) error {
|
|||||||
|
|
||||||
func (ss *scaleSet) newAvailabilitySetNodesCache() (*azcache.TimedCache, error) {
|
func (ss *scaleSet) newAvailabilitySetNodesCache() (*azcache.TimedCache, error) {
|
||||||
getter := func(key string) (interface{}, error) {
|
getter := func(key string) (interface{}, error) {
|
||||||
localCache := sets.NewString()
|
vmNames := sets.NewString()
|
||||||
resourceGroups, err := ss.GetResourceGroups()
|
resourceGroups, err := ss.GetResourceGroups()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -287,11 +292,22 @@ func (ss *scaleSet) newAvailabilitySetNodesCache() (*azcache.TimedCache, error)
|
|||||||
|
|
||||||
for _, vm := range vmList {
|
for _, vm := range vmList {
|
||||||
if vm.Name != nil {
|
if vm.Name != nil {
|
||||||
localCache.Insert(*vm.Name)
|
vmNames.Insert(*vm.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// store all the node names in the cluster when the cache data was created.
|
||||||
|
nodeNames, err := ss.GetNodeNames()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
localCache := availabilitySetEntry{
|
||||||
|
vmNames: vmNames,
|
||||||
|
nodeNames: nodeNames,
|
||||||
|
}
|
||||||
|
|
||||||
return localCache, nil
|
return localCache, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -313,6 +329,16 @@ func (ss *scaleSet) isNodeManagedByAvailabilitySet(nodeName string, crt azcache.
|
|||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
availabilitySetNodes := cached.(sets.String)
|
cachedNodes := cached.(availabilitySetEntry).nodeNames
|
||||||
return availabilitySetNodes.Has(nodeName), nil
|
// if the node is not in the cache, assume the node has joined after the last cache refresh and attempt to refresh the cache.
|
||||||
|
if !cachedNodes.Has(nodeName) {
|
||||||
|
klog.V(2).Infof("Node %s has joined the cluster since the last VM cache refresh, refreshing the cache", nodeName)
|
||||||
|
cached, err = ss.availabilitySetNodesCache.Get(availabilitySetNodesKey, azcache.CacheReadTypeForceRefresh)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cachedVMs := cached.(availabilitySetEntry).vmNames
|
||||||
|
return cachedVMs.Has(nodeName), nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user