From dd4dae4a57129c40d4496ca6ad37ee3a4fa045fa Mon Sep 17 00:00:00 2001 From: Zach Loafman Date: Mon, 20 Jun 2016 17:37:49 -0700 Subject: [PATCH] GCE provider: Limit Filter calls to regexps rather than insane blobs Filters can't exceed 4k, and GET requests against the GCE API are also limited, so these break down in different ways at different cluster counts. Fix it by introducing an advisory node-instance-prefix configuration in the GCE provider that can hint the EnsureLoadBalancer/UpdateLoadBalancer code (and the firewall creation/update code). If it's not there, or wrong (a hostname that's registered violates it), just ignore it and grab the whole project. --- cluster/gce/configure-vm.sh | 1 + cluster/gce/gci/configure-helper.sh | 1 + cluster/gce/trusty/configure-helper.sh | 1 + pkg/cloudprovider/providers/gce/gce.go | 78 ++++++++++++++++++-------- test/e2e/e2e.go | 2 +- 5 files changed, 58 insertions(+), 25 deletions(-) diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh index d5cc7c9e136..cca579d2e64 100755 --- a/cluster/gce/configure-vm.sh +++ b/cluster/gce/configure-vm.sh @@ -793,6 +793,7 @@ EOF if [[ -n "${NODE_INSTANCE_PREFIX:-}" ]]; then cat <>/etc/gce.conf node-tags = ${NODE_INSTANCE_PREFIX} +node-instance-prefix = ${NODE_INSTANCE_PREFIX} EOF CLOUD_CONFIG=/etc/gce.conf fi diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 7ec604cfe2f..177e0975907 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -191,6 +191,7 @@ EOF use_cloud_config="true" cat <>/etc/gce.conf node-tags = ${NODE_INSTANCE_PREFIX} +node-instance-prefix = ${NODE_INSTANCE_PREFIX} EOF fi if [[ -n "${MULTIZONE:-}" ]]; then diff --git a/cluster/gce/trusty/configure-helper.sh b/cluster/gce/trusty/configure-helper.sh index 432fb269809..e43de66e756 100644 --- a/cluster/gce/trusty/configure-helper.sh +++ b/cluster/gce/trusty/configure-helper.sh @@ -334,6 +334,7 @@ EOF use_cloud_config="true" cat <>/etc/gce.conf node-tags = ${NODE_INSTANCE_PREFIX} +node-instance-prefix = ${NODE_INSTANCE_PREFIX} EOF fi if [ -n "${MULTIZONE:-}" ]; then diff --git a/pkg/cloudprovider/providers/gce/gce.go b/pkg/cloudprovider/providers/gce/gce.go index 8649058cdd1..d2cf415b54e 100644 --- a/pkg/cloudprovider/providers/gce/gce.go +++ b/pkg/cloudprovider/providers/gce/gce.go @@ -85,18 +85,20 @@ type GCECloud struct { managedZones []string // List of zones we are spanning (for Ubernetes-Lite, primarily when running on master) networkURL string nodeTags []string // List of tags to use on firewall rules for load balancers + nodeInstancePrefix string // If non-"", an advisory prefix for all nodes in the cluster useMetadataServer bool operationPollRateLimiter flowcontrol.RateLimiter } type Config struct { Global struct { - TokenURL string `gcfg:"token-url"` - TokenBody string `gcfg:"token-body"` - ProjectID string `gcfg:"project-id"` - NetworkName string `gcfg:"network-name"` - NodeTags []string `gcfg:"node-tags"` - Multizone bool `gcfg:"multizone"` + TokenURL string `gcfg:"token-url"` + TokenBody string `gcfg:"token-body"` + ProjectID string `gcfg:"project-id"` + NetworkName string `gcfg:"network-name"` + NodeTags []string `gcfg:"node-tags"` + NodeInstancePrefix string `gcfg:"node-instance-prefix"` + Multizone bool `gcfg:"multizone"` } } @@ -260,6 +262,7 @@ func newGCECloud(config io.Reader) (*GCECloud, error) { tokenSource := google.ComputeTokenSource("") var nodeTags []string + var nodeInstancePrefix string if config != nil { var cfg Config if err := gcfg.ReadInto(&cfg, config); err != nil { @@ -281,19 +284,20 @@ func newGCECloud(config io.Reader) (*GCECloud, error) { tokenSource = NewAltTokenSource(cfg.Global.TokenURL, cfg.Global.TokenBody) } nodeTags = cfg.Global.NodeTags + nodeInstancePrefix = cfg.Global.NodeInstancePrefix if cfg.Global.Multizone { managedZones = nil // Use all zones in region } } - return CreateGCECloud(projectID, region, zone, managedZones, networkURL, nodeTags, tokenSource, true /* useMetadataServer */) + return CreateGCECloud(projectID, region, zone, managedZones, networkURL, nodeTags, nodeInstancePrefix, tokenSource, true /* useMetadataServer */) } // Creates a GCECloud object using the specified parameters. // If no networkUrl is specified, loads networkName via rest call. // If no tokenSource is specified, uses oauth2.DefaultTokenSource. // If managedZones is nil / empty all zones in the region will be managed. -func CreateGCECloud(projectID, region, zone string, managedZones []string, networkURL string, nodeTags []string, tokenSource oauth2.TokenSource, useMetadataServer bool) (*GCECloud, error) { +func CreateGCECloud(projectID, region, zone string, managedZones []string, networkURL string, nodeTags []string, nodeInstancePrefix string, tokenSource oauth2.TokenSource, useMetadataServer bool) (*GCECloud, error) { if tokenSource == nil { var err error tokenSource, err = google.DefaultTokenSource( @@ -348,6 +352,7 @@ func CreateGCECloud(projectID, region, zone string, managedZones []string, netwo managedZones: managedZones, networkURL: networkURL, nodeTags: nodeTags, + nodeInstancePrefix: nodeInstancePrefix, useMetadataServer: useMetadataServer, operationPollRateLimiter: operationPollRateLimiter, }, nil @@ -1017,9 +1022,20 @@ func (gce *GCECloud) firewallObject(name, region, desc string, sourceRanges nets // is unspecified func (gce *GCECloud) computeHostTags(hosts []*gceInstance) ([]string, error) { // TODO: We could store the tags in gceInstance, so we could have already fetched it - hostNamesByZone := make(map[string][]string) + hostNamesByZone := make(map[string]map[string]bool) // map of zones -> map of names -> bool (for easy lookup) + nodeInstancePrefix := gce.nodeInstancePrefix for _, host := range hosts { - hostNamesByZone[host.Zone] = append(hostNamesByZone[host.Zone], host.Name) + if !strings.HasPrefix(host.Name, gce.nodeInstancePrefix) { + glog.Warningf("instance '%s' does not conform to prefix '%s', ignoring filter", host, gce.nodeInstancePrefix) + nodeInstancePrefix = "" + } + + z, ok := hostNamesByZone[host.Zone] + if !ok { + z = make(map[string]bool) + hostNamesByZone[host.Zone] = z + } + z[host.Name] = true } tags := sets.NewString() @@ -1030,11 +1046,14 @@ func (gce *GCECloud) computeHostTags(hosts []*gceInstance) ([]string, error) { for ; page == 0 || (pageToken != "" && page < maxPages); page++ { listCall := gce.service.Instances.List(gce.projectID, zone) - // Add the filter for hosts - listCall = listCall.Filter("name eq (" + strings.Join(hostNames, "|") + ")") + if nodeInstancePrefix != "" { + // Add the filter for hosts + listCall = listCall.Filter("name eq " + nodeInstancePrefix + ".*") + } // Add the fields we want - listCall = listCall.Fields("items(name,tags)") + // TODO(zmerlynn): Internal bug 29524655 + // listCall = listCall.Fields("items(name,tags)") if pageToken != "" { listCall = listCall.PageToken(pageToken) @@ -1046,6 +1065,10 @@ func (gce *GCECloud) computeHostTags(hosts []*gceInstance) ([]string, error) { } pageToken = res.NextPageToken for _, instance := range res.Items { + if !hostNames[instance.Name] { + continue + } + longest_tag := "" for _, tag := range instance.Tags.Items { if strings.HasPrefix(instance.Name, tag) && len(tag) > len(longest_tag) { @@ -2429,21 +2452,20 @@ type gceDisk struct { // Gets the named instances, returning cloudprovider.InstanceNotFound if any instance is not found func (gce *GCECloud) getInstancesByNames(names []string) ([]*gceInstance, error) { instances := make(map[string]*gceInstance) + remaining := len(names) + nodeInstancePrefix := gce.nodeInstancePrefix for _, name := range names { name = canonicalizeInstanceName(name) + if !strings.HasPrefix(name, gce.nodeInstancePrefix) { + glog.Warningf("instance '%s' does not conform to prefix '%s', removing filter", name, gce.nodeInstancePrefix) + nodeInstancePrefix = "" + } instances[name] = nil } for _, zone := range gce.managedZones { - var remaining []string - for name, instance := range instances { - if instance == nil { - remaining = append(remaining, name) - } - } - - if len(remaining) == 0 { + if remaining == 0 { break } @@ -2452,10 +2474,13 @@ func (gce *GCECloud) getInstancesByNames(names []string) ([]*gceInstance, error) for ; page == 0 || (pageToken != "" && page < maxPages); page++ { listCall := gce.service.Instances.List(gce.projectID, zone) - // Add the filter for hosts - listCall = listCall.Filter("name eq (" + strings.Join(remaining, "|") + ")") + if nodeInstancePrefix != "" { + // Add the filter for hosts + listCall = listCall.Filter("name eq " + nodeInstancePrefix + ".*") + } - listCall = listCall.Fields("items(name,id,disks,machineType)") + // TODO(zmerlynn): Internal bug 29524655 + // listCall = listCall.Fields("items(name,id,disks,machineType)") if pageToken != "" { listCall.PageToken(pageToken) } @@ -2467,6 +2492,10 @@ func (gce *GCECloud) getInstancesByNames(names []string) ([]*gceInstance, error) pageToken = res.NextPageToken for _, i := range res.Items { name := i.Name + if _, ok := instances[name]; !ok { + continue + } + instance := &gceInstance{ Zone: zone, Name: name, @@ -2475,6 +2504,7 @@ func (gce *GCECloud) getInstancesByNames(names []string) ([]*gceInstance, error) Type: lastComponent(i.MachineType), } instances[name] = instance + remaining-- } } if page >= maxPages { diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 82e2009f387..366a6ccaa8e 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -73,7 +73,7 @@ func setupProviderConfig() error { return fmt.Errorf("error parsing GCE/GKE region from zone %q: %v", zone, err) } managedZones := []string{zone} // Only single-zone for now - cloudConfig.Provider, err = gcecloud.CreateGCECloud(framework.TestContext.CloudConfig.ProjectID, region, zone, managedZones, "" /* networkUrl */, nil /* nodeTags */, tokenSource, false /* useMetadataServer */) + cloudConfig.Provider, err = gcecloud.CreateGCECloud(framework.TestContext.CloudConfig.ProjectID, region, zone, managedZones, "" /* networkUrl */, nil /* nodeTags */, "" /* nodeInstancePerfix */, tokenSource, false /* useMetadataServer */) if err != nil { return fmt.Errorf("Error building GCE/GKE provider: %v", err) }