From 9f44c72ba9d13fb95c60b2a1e72ab28c2666db98 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Wed, 20 Jan 2016 10:57:53 -0500 Subject: [PATCH] AWS: findInstancesByNodeNames should not make O(N) API calls findInstancesByNodeNames was a simple loop around findInstanceByNodeName, which made an EC2 API call for each call. We've had trouble with this sort of behaviour hitting EC2 rate limits on bigger clusters (e.g. #11979). Instead, change this method to fetch _all_ the tagged EC2 instances, and then loop through the local results. This is one API call (modulo paging). We are currently only using findInstancesByNodeNames for the load balancer, where we attach every node, so we were fetching all but one of the instances anyway. Issue #11979 --- pkg/cloudprovider/providers/aws/aws.go | 55 ++++++++++++++++++++------ 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go index d3de323a64b..cd9a18e1863 100644 --- a/pkg/cloudprovider/providers/aws/aws.go +++ b/pkg/cloudprovider/providers/aws/aws.go @@ -2146,22 +2146,55 @@ func (a *AWSCloud) getInstancesByIDs(instanceIDs []*string) (map[string]*ec2.Ins return instancesByID, nil } -// TODO: Make efficient +// Fetches instances by node names; returns an error if any cannot be found. +// This is currently implemented by fetching all the instances, because this is currently called for all nodes (i.e. the majority) +// In practice, the breakeven vs looping through and calling getInstanceByNodeName is probably around N=2. func (a *AWSCloud) getInstancesByNodeNames(nodeNames []string) ([]*ec2.Instance, error) { - instances := []*ec2.Instance{} - for _, nodeName := range nodeNames { - instance, err := a.getInstanceByNodeName(nodeName) - if err != nil { - return nil, err - } - if instance == nil { - return nil, fmt.Errorf("unable to find instance " + nodeName) - } - instances = append(instances, instance) + allInstances, err := a.getAllInstances() + if err != nil { + return nil, err } + + nodeNamesMap := make(map[string]int, len(nodeNames)) + for i, nodeName := range nodeNames { + nodeNamesMap[nodeName] = i + } + + instances := make([]*ec2.Instance, len(nodeNames)) + for _, instance := range allInstances { + nodeName := aws.StringValue(instance.PrivateDnsName) + if nodeName == "" { + glog.V(2).Infof("ignoring ec2 instance with no PrivateDnsName: %q", aws.StringValue(instance.InstanceId)) + continue + } + i, found := nodeNamesMap[nodeName] + if !found { + continue + } + instances[i] = instance + } + + for i, instance := range instances { + if instance == nil { + nodeName := nodeNames[i] + return nil, fmt.Errorf("unable to find instance %q", nodeName) + } + } + return instances, nil } +// Returns all instances that are tagged as being in this cluster. +func (a *AWSCloud) getAllInstances() ([]*ec2.Instance, error) { + filters := []*ec2.Filter{} + filters = a.addFilters(filters) + request := &ec2.DescribeInstancesInput{ + Filters: filters, + } + + return a.ec2.DescribeInstances(request) +} + // Returns the instance with the specified node name // Returns nil if it does not exist func (a *AWSCloud) findInstanceByNodeName(nodeName string) (*ec2.Instance, error) {