From c6f6972958061a888a482e8559c83dfd4cabcdcf Mon Sep 17 00:00:00 2001 From: wojtekt Date: Mon, 6 Apr 2020 20:04:44 +0200 Subject: [PATCH] Fix GCE ILB for large clusters --- .../gce/gce_loadbalancer_internal.go | 13 +++++++++++ .../gce/gce_loadbalancer_internal_test.go | 23 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go index 86290a963e3..0aaea68b4f2 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go @@ -45,6 +45,8 @@ const ( ILBFinalizerV1 = "gke.networking.io/l4-ilb-v1" // ILBFinalizerV2 is the finalizer used by newer controllers that implement Internal LoadBalancer services. ILBFinalizerV2 = "gke.networking.io/l4-ilb-v2" + // maxInstancesPerInstanceGroup defines maximum number of VMs per InstanceGroup. + maxInstancesPerInstanceGroup = 1000 ) func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, existingFwdRule *compute.ForwardingRule, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) { @@ -512,6 +514,17 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node) kubeNodes.Insert(n.Name) } + // Individual InstanceGroup has a limit for 1000 instances in it. + // As a result, it's not possible to add more to it. + // Given that the long-term fix (AlphaFeatureILBSubsets) is already in-progress, + // to stop the bleeding we now simply cut down the contents to first 1000 + // instances in the alphabetical order. Since there is a limitation for + // 250 backend VMs for ILB, this isn't making things worse. + if len(kubeNodes) > maxInstancesPerInstanceGroup { + klog.Warningf("Limiting number of VMs for InstanceGroup %s to %d", name, maxInstancesPerInstanceGroup) + kubeNodes = sets.NewString(kubeNodes.List()[:maxInstancesPerInstanceGroup]...) + } + gceNodes := sets.NewString() if ig == nil { klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): creating instance group", name, zone) diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal_test.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal_test.go index a8a7b1a321c..fd85cc3f72a 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal_test.go @@ -148,6 +148,29 @@ func TestEnsureInternalBackendServiceGroups(t *testing.T) { } } +func TestEnsureInternalInstanceGroupsLimit(t *testing.T) { + t.Parallel() + + vals := DefaultTestClusterValues() + nodeNames := []string{} + for i := 0; i < maxInstancesPerInstanceGroup+5; i++ { + nodeNames = append(nodeNames, fmt.Sprintf("node-%d", i)) + } + + gce, err := fakeGCECloud(vals) + require.NoError(t, err) + + nodes, err := createAndInsertNodes(gce, nodeNames, vals.ZoneName) + require.NoError(t, err) + igName := makeInstanceGroupName(vals.ClusterID) + _, err = gce.ensureInternalInstanceGroups(igName, nodes) + require.NoError(t, err) + + instances, err := gce.ListInstancesInInstanceGroup(igName, vals.ZoneName, allInstances) + require.NoError(t, err) + assert.Equal(t, maxInstancesPerInstanceGroup, len(instances)) +} + func TestEnsureInternalLoadBalancer(t *testing.T) { t.Parallel()