Fix GCE ILB for large clusters

This commit is contained in:
wojtekt 2020-04-06 20:04:44 +02:00
parent c158001bbc
commit c6f6972958
2 changed files with 36 additions and 0 deletions

View File

@ -45,6 +45,8 @@ const (
ILBFinalizerV1 = "gke.networking.io/l4-ilb-v1"
// ILBFinalizerV2 is the finalizer used by newer controllers that implement Internal LoadBalancer services.
ILBFinalizerV2 = "gke.networking.io/l4-ilb-v2"
// maxInstancesPerInstanceGroup defines maximum number of VMs per InstanceGroup.
maxInstancesPerInstanceGroup = 1000
)
func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, existingFwdRule *compute.ForwardingRule, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) {
@ -512,6 +514,17 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node)
kubeNodes.Insert(n.Name)
}
// Individual InstanceGroup has a limit for 1000 instances in it.
// As a result, it's not possible to add more to it.
// Given that the long-term fix (AlphaFeatureILBSubsets) is already in-progress,
// to stop the bleeding we now simply cut down the contents to first 1000
// instances in the alphabetical order. Since there is a limitation for
// 250 backend VMs for ILB, this isn't making things worse.
if len(kubeNodes) > maxInstancesPerInstanceGroup {
klog.Warningf("Limiting number of VMs for InstanceGroup %s to %d", name, maxInstancesPerInstanceGroup)
kubeNodes = sets.NewString(kubeNodes.List()[:maxInstancesPerInstanceGroup]...)
}
gceNodes := sets.NewString()
if ig == nil {
klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): creating instance group", name, zone)

View File

@ -148,6 +148,29 @@ func TestEnsureInternalBackendServiceGroups(t *testing.T) {
}
}
func TestEnsureInternalInstanceGroupsLimit(t *testing.T) {
t.Parallel()
vals := DefaultTestClusterValues()
nodeNames := []string{}
for i := 0; i < maxInstancesPerInstanceGroup+5; i++ {
nodeNames = append(nodeNames, fmt.Sprintf("node-%d", i))
}
gce, err := fakeGCECloud(vals)
require.NoError(t, err)
nodes, err := createAndInsertNodes(gce, nodeNames, vals.ZoneName)
require.NoError(t, err)
igName := makeInstanceGroupName(vals.ClusterID)
_, err = gce.ensureInternalInstanceGroups(igName, nodes)
require.NoError(t, err)
instances, err := gce.ListInstancesInInstanceGroup(igName, vals.ZoneName, allInstances)
require.NoError(t, err)
assert.Equal(t, maxInstancesPerInstanceGroup, len(instances))
}
func TestEnsureInternalLoadBalancer(t *testing.T) {
t.Parallel()