mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 03:33:56 +00:00
fix: exclude non-ready nodes and deleted nodes from azure load balancers
Make sure that nodes that are not in the ready state and are not newly created (i.e. not having the "node.cloudprovider.kubernetes.io/uninitialized" taint) get removed from load balancers. Also remove nodes that are being deleted from the cluster. Signed-off-by: Riccardo Ravaioli <rravaiol@redhat.com>
This commit is contained in:
parent
a2adaf75b7
commit
13ba86c3b5
@ -42,6 +42,7 @@ import (
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
cloudproviderapi "k8s.io/cloud-provider/api"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/legacy-cloud-providers/azure/auth"
|
||||
azcache "k8s.io/legacy-cloud-providers/azure/cache"
|
||||
@ -793,7 +794,7 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
|
||||
}
|
||||
}
|
||||
|
||||
//Remove from nodeZones cache if using depreciated LabelFailureDomainBetaZone
|
||||
// Remove from nodeZones cache if using deprecated LabelFailureDomainBetaZone
|
||||
prevZoneFailureDomain, ok := prevNode.ObjectMeta.Labels[v1.LabelFailureDomainBetaZone]
|
||||
if ok && az.isAvailabilityZone(prevZoneFailureDomain) {
|
||||
az.nodeZones[prevZone].Delete(prevNode.ObjectMeta.Name)
|
||||
@ -808,16 +809,17 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
|
||||
delete(az.nodeResourceGroups, prevNode.ObjectMeta.Name)
|
||||
}
|
||||
|
||||
// Remove from unmanagedNodes cache.
|
||||
managed, ok := prevNode.ObjectMeta.Labels[managedByAzureLabel]
|
||||
if ok && managed == "false" {
|
||||
isNodeManagedByCloudProvider := !ok || managed != "false"
|
||||
|
||||
// Remove from unmanagedNodes cache
|
||||
if !isNodeManagedByCloudProvider {
|
||||
az.unmanagedNodes.Delete(prevNode.ObjectMeta.Name)
|
||||
az.excludeLoadBalancerNodes.Delete(prevNode.ObjectMeta.Name)
|
||||
}
|
||||
|
||||
// Remove from excludeLoadBalancerNodes cache.
|
||||
if _, hasExcludeBalancerLabel := prevNode.ObjectMeta.Labels[v1.LabelNodeExcludeBalancers]; hasExcludeBalancerLabel {
|
||||
az.excludeLoadBalancerNodes.Delete(prevNode.ObjectMeta.Name)
|
||||
// if the node is being deleted from the cluster, exclude it from load balancers
|
||||
if newNode == nil {
|
||||
az.excludeLoadBalancerNodes.Insert(prevNode.ObjectMeta.Name)
|
||||
}
|
||||
}
|
||||
|
||||
@ -840,16 +842,35 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
|
||||
az.nodeResourceGroups[newNode.ObjectMeta.Name] = strings.ToLower(newRG)
|
||||
}
|
||||
|
||||
// Add to unmanagedNodes cache.
|
||||
_, hasExcludeBalancerLabel := newNode.ObjectMeta.Labels[v1.LabelNodeExcludeBalancers]
|
||||
managed, ok := newNode.ObjectMeta.Labels[managedByAzureLabel]
|
||||
if ok && managed == "false" {
|
||||
isNodeManagedByCloudProvider := !ok || managed != "false"
|
||||
|
||||
// Update unmanagedNodes cache
|
||||
if !isNodeManagedByCloudProvider {
|
||||
az.unmanagedNodes.Insert(newNode.ObjectMeta.Name)
|
||||
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
|
||||
}
|
||||
|
||||
// Add to excludeLoadBalancerNodes cache.
|
||||
if _, hasExcludeBalancerLabel := newNode.ObjectMeta.Labels[v1.LabelNodeExcludeBalancers]; hasExcludeBalancerLabel {
|
||||
// Update excludeLoadBalancerNodes cache
|
||||
switch {
|
||||
case !isNodeManagedByCloudProvider:
|
||||
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
|
||||
|
||||
case hasExcludeBalancerLabel:
|
||||
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
|
||||
|
||||
case !isNodeReady(newNode) && getCloudTaint(newNode.Spec.Taints) == nil:
|
||||
// If not in ready state and not a newly created node, add to excludeLoadBalancerNodes cache.
|
||||
// New nodes (tainted with "node.cloudprovider.kubernetes.io/uninitialized") should not be
|
||||
// excluded from load balancers regardless of their state, so as to reduce the number of
|
||||
// VMSS API calls and not provoke VMScaleSetActiveModelsCountLimitReached.
|
||||
// (https://github.com/kubernetes-sigs/cloud-provider-azure/issues/851)
|
||||
az.excludeLoadBalancerNodes.Insert(newNode.ObjectMeta.Name)
|
||||
|
||||
default:
|
||||
// Nodes not falling into the three cases above are valid backends and
|
||||
// should not appear in excludeLoadBalancerNodes cache.
|
||||
az.excludeLoadBalancerNodes.Delete(newNode.ObjectMeta.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -975,3 +996,21 @@ func (az *Cloud) ShouldNodeExcludedFromLoadBalancer(nodeName string) (bool, erro
|
||||
|
||||
return az.excludeLoadBalancerNodes.Has(nodeName), nil
|
||||
}
|
||||
|
||||
func isNodeReady(node *v1.Node) bool {
|
||||
for _, cond := range node.Status.Conditions {
|
||||
if cond.Type == v1.NodeReady && cond.Status == v1.ConditionTrue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getCloudTaint(taints []v1.Taint) *v1.Taint {
|
||||
for _, taint := range taints {
|
||||
if taint.Key == cloudproviderapi.TaintExternalCloudProvider {
|
||||
return &taint
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -1125,11 +1125,8 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service,
|
||||
if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If a node is not supposed to be included in the LB, it
|
||||
// would not be in the `nodes` slice. We need to check the nodes that
|
||||
// have been added to the LB's backendpool, find the unwanted ones and
|
||||
// delete them from the pool.
|
||||
// If the node appears in the local cache of nodes to exclude,
|
||||
// delete it from the load balancer backend pool.
|
||||
shouldExcludeLoadBalancer, err := az.ShouldNodeExcludedFromLoadBalancer(nodeName)
|
||||
if err != nil {
|
||||
klog.Errorf("ShouldNodeExcludedFromLoadBalancer(%s) failed with error: %v", nodeName, err)
|
||||
|
@ -40,6 +40,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/informers"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
cloudproviderapi "k8s.io/cloud-provider/api"
|
||||
servicehelpers "k8s.io/cloud-provider/service/helpers"
|
||||
"k8s.io/legacy-cloud-providers/azure/auth"
|
||||
"k8s.io/legacy-cloud-providers/azure/clients/interfaceclient/mockinterfaceclient"
|
||||
@ -3228,7 +3229,7 @@ func TestUpdateNodeCaches(t *testing.T) {
|
||||
ctrl := gomock.NewController(t)
|
||||
defer ctrl.Finish()
|
||||
az := GetTestCloud(ctrl)
|
||||
|
||||
// delete node appearing in unmanagedNodes and excludeLoadBalancerNodes
|
||||
zone := fmt.Sprintf("%s-0", az.Location)
|
||||
nodesInZone := sets.NewString("prevNode")
|
||||
az.nodeZones = map[string]sets.String{zone: nodesInZone}
|
||||
@ -3248,14 +3249,16 @@ func TestUpdateNodeCaches(t *testing.T) {
|
||||
Name: "prevNode",
|
||||
},
|
||||
}
|
||||
|
||||
// node is deleted from the cluster
|
||||
az.updateNodeCaches(&prevNode, nil)
|
||||
assert.Equal(t, 0, len(az.nodeZones[zone]))
|
||||
assert.Equal(t, 0, len(az.nodeResourceGroups))
|
||||
assert.Equal(t, 0, len(az.unmanagedNodes))
|
||||
assert.Equal(t, 0, len(az.excludeLoadBalancerNodes))
|
||||
// deleted node should be excluded from load balancer
|
||||
assert.Equal(t, 1, len(az.excludeLoadBalancerNodes))
|
||||
assert.Equal(t, 0, len(az.nodeNames))
|
||||
|
||||
// add new (unmanaged and to-be-excluded) node
|
||||
newNode := v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
@ -3267,12 +3270,12 @@ func TestUpdateNodeCaches(t *testing.T) {
|
||||
Name: "newNode",
|
||||
},
|
||||
}
|
||||
|
||||
// new node is added to the cluster
|
||||
az.updateNodeCaches(nil, &newNode)
|
||||
assert.Equal(t, 1, len(az.nodeZones[zone]))
|
||||
assert.Equal(t, 1, len(az.nodeResourceGroups))
|
||||
assert.Equal(t, 1, len(az.unmanagedNodes))
|
||||
assert.Equal(t, 1, len(az.excludeLoadBalancerNodes))
|
||||
assert.Equal(t, 2, len(az.excludeLoadBalancerNodes))
|
||||
assert.Equal(t, 1, len(az.nodeNames))
|
||||
}
|
||||
|
||||
@ -3310,6 +3313,75 @@ func TestUpdateNodeCacheExcludeLoadBalancer(t *testing.T) {
|
||||
az.updateNodeCaches(&prevNode, &newNode)
|
||||
assert.Equal(t, 1, len(az.excludeLoadBalancerNodes))
|
||||
|
||||
// a non-ready node should be excluded
|
||||
az.unmanagedNodes = sets.NewString()
|
||||
az.excludeLoadBalancerNodes = sets.NewString()
|
||||
az.nodeNames = sets.NewString()
|
||||
nonReadyNode := v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
v1.LabelTopologyZone: zone,
|
||||
},
|
||||
Name: "aNode",
|
||||
},
|
||||
Status: v1.NodeStatus{
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
az.updateNodeCaches(nil, &nonReadyNode)
|
||||
assert.Equal(t, 1, len(az.excludeLoadBalancerNodes))
|
||||
|
||||
// node becomes ready, it should be removed from az.excludeLoadBalancerNodes
|
||||
readyNode := v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
v1.LabelTopologyZone: zone,
|
||||
},
|
||||
Name: "aNode",
|
||||
},
|
||||
Status: v1.NodeStatus{
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
az.updateNodeCaches(&nonReadyNode, &readyNode)
|
||||
assert.Equal(t, 0, len(az.excludeLoadBalancerNodes))
|
||||
|
||||
// new non-ready node with taint is added to the cluster: don't exclude it
|
||||
nonReadyTaintedNode := v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
v1.LabelTopologyZone: zone,
|
||||
},
|
||||
Name: "anotherNode",
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Taints: []v1.Taint{{
|
||||
Key: cloudproviderapi.TaintExternalCloudProvider,
|
||||
Value: "aValue",
|
||||
Effect: v1.TaintEffectNoSchedule},
|
||||
},
|
||||
},
|
||||
Status: v1.NodeStatus{
|
||||
Conditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
az.updateNodeCaches(nil, &nonReadyTaintedNode)
|
||||
assert.Equal(t, 0, len(az.excludeLoadBalancerNodes))
|
||||
}
|
||||
|
||||
func TestGetActiveZones(t *testing.T) {
|
||||
|
Loading…
Reference in New Issue
Block a user