Only find running aws hosts by nodename

When finding instance by node name in AWS, only retrieve running
instances.  Otherwise terminated, old nodes can show up with the same
tag when rebuilding nodes in the cluster.

Another improvement made is to filter instances by the node names
provided, rather than selecting all instances and filtering in code.

Authors: @jsravn, @chbatey, @balooo
This commit is contained in:
Chris Batey, James Ravn and Yoseph Samuel 2016-02-23 14:44:03 +00:00
parent 7f1b699880
commit 087ff78cf9
2 changed files with 137 additions and 64 deletions

View File

@ -772,7 +772,7 @@ func (aws *AWSCloud) ExternalID(name string) (string, error) {
if err != nil {
return "", err
}
if instance == nil || !isAlive(instance) {
if instance == nil {
return "", cloudprovider.InstanceNotFound
}
return orEmpty(instance.InstanceId), nil
@ -817,28 +817,9 @@ func (aws *AWSCloud) InstanceType(name string) (string, error) {
}
}
// Check if the instance is alive (running or pending)
// We typically ignore instances that are not alive
func isAlive(instance *ec2.Instance) bool {
if instance.State == nil {
glog.Warning("Instance state was unexpectedly nil: ", instance)
return false
}
stateName := orEmpty(instance.State.Name)
switch stateName {
case "shutting-down", "terminated", "stopping", "stopped":
return false
case "pending", "running":
return true
default:
glog.Errorf("Unknown EC2 instance state: %s", stateName)
return false
}
}
// Return a list of instances matching regex string.
func (s *AWSCloud) getInstancesByRegex(regex string) ([]string, error) {
filters := []*ec2.Filter{}
filters := []*ec2.Filter{newEc2Filter("instance-state-name", "running")}
filters = s.addFilters(filters)
request := &ec2.DescribeInstancesInput{
Filters: filters,
@ -864,11 +845,6 @@ func (s *AWSCloud) getInstancesByRegex(regex string) ([]string, error) {
matchingInstances := []string{}
for _, instance := range instances {
// TODO: Push filtering down into EC2 API filter?
if !isAlive(instance) {
continue
}
// Only return fully-ready instances when listing instances
// (vs a query by name, where we will return it if we find it)
if orEmpty(instance.State.Name) == "pending" {
@ -2365,54 +2341,37 @@ func (a *AWSCloud) getInstancesByIDs(instanceIDs []*string) (map[string]*ec2.Ins
}
// Fetches instances by node names; returns an error if any cannot be found.
// This is currently implemented by fetching all the instances, because this is currently called for all nodes (i.e. the majority)
// In practice, the breakeven vs looping through and calling getInstanceByNodeName is probably around N=2.
// This is implemented with a multi value filter on the node names, fetching the desired instances with a single query.
func (a *AWSCloud) getInstancesByNodeNames(nodeNames []string) ([]*ec2.Instance, error) {
allInstances, err := a.getAllInstances()
if err != nil {
return nil, err
names := aws.StringSlice(nodeNames)
nodeNameFilter := &ec2.Filter{
Name: aws.String("private-dns-name"),
Values: names,
}
nodeNamesMap := make(map[string]int, len(nodeNames))
for i, nodeName := range nodeNames {
nodeNamesMap[nodeName] = i
filters := []*ec2.Filter{
nodeNameFilter,
newEc2Filter("instance-state-name", "running"),
}
instances := make([]*ec2.Instance, len(nodeNames))
for _, instance := range allInstances {
nodeName := aws.StringValue(instance.PrivateDnsName)
if nodeName == "" {
if isAlive(instance) {
glog.V(2).Infof("ignoring ec2 instance with no PrivateDnsName: %q", aws.StringValue(instance.InstanceId))
}
continue
}
i, found := nodeNamesMap[nodeName]
if !found {
continue
}
instances[i] = instance
}
for i, instance := range instances {
if instance == nil {
nodeName := nodeNames[i]
return nil, fmt.Errorf("unable to find instance %q", nodeName)
}
}
return instances, nil
}
// Returns all instances that are tagged as being in this cluster.
func (a *AWSCloud) getAllInstances() ([]*ec2.Instance, error) {
filters := []*ec2.Filter{}
filters = a.addFilters(filters)
request := &ec2.DescribeInstancesInput{
Filters: filters,
}
return a.ec2.DescribeInstances(request)
instances, err := a.ec2.DescribeInstances(request)
if err != nil {
glog.V(2).Infof("Failed to describe instances %v", nodeNames)
return nil, err
}
if len(instances) == 0 {
glog.V(3).Infof("Failed to find any instances %v", nodeNames)
return nil, nil
}
return instances, nil
}
// Returns the instance with the specified node name
@ -2420,6 +2379,7 @@ func (a *AWSCloud) getAllInstances() ([]*ec2.Instance, error) {
func (a *AWSCloud) findInstanceByNodeName(nodeName string) (*ec2.Instance, error) {
filters := []*ec2.Filter{
newEc2Filter("private-dns-name", nodeName),
newEc2Filter("instance-state-name", "running"),
}
filters = a.addFilters(filters)
request := &ec2.DescribeInstancesInput{

View File

@ -275,6 +275,20 @@ func instanceMatchesFilter(instance *ec2.Instance, filter *ec2.Filter) bool {
}
return contains(filter.Values, *instance.PrivateDnsName)
}
if name == "instance-state-name" {
return contains(filter.Values, *instance.State.Name)
}
if name == "tag:"+TagNameKubernetesCluster {
for _, tag := range instance.Tags {
if *tag.Key == TagNameKubernetesCluster {
return contains(filter.Values, *tag.Value)
}
}
return false
}
panic("Unknown filter name: " + name)
}
@ -937,3 +951,102 @@ func TestIpPermissionExistsHandlesMultipleGroupIdsWithUserIds(t *testing.T) {
t.Errorf("Should have not been considered equal since first is not in the second array of groups")
}
}
func TestFindInstanceByNodeNameExcludesTerminatedInstances(t *testing.T) {
awsServices := NewFakeAWSServices()
nodeName := "my-dns.internal"
var tag ec2.Tag
tag.Key = aws.String(TagNameKubernetesCluster)
tag.Value = aws.String(TestClusterId)
tags := []*ec2.Tag{&tag}
var runningInstance ec2.Instance
runningInstance.InstanceId = aws.String("i-running")
runningInstance.PrivateDnsName = aws.String(nodeName)
runningInstance.State = &ec2.InstanceState{Code: aws.Int64(16), Name: aws.String("running")}
runningInstance.Tags = tags
var terminatedInstance ec2.Instance
terminatedInstance.InstanceId = aws.String("i-terminated")
terminatedInstance.PrivateDnsName = aws.String(nodeName)
terminatedInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("terminated")}
terminatedInstance.Tags = tags
instances := []*ec2.Instance{&terminatedInstance, &runningInstance}
awsServices.instances = append(awsServices.instances, instances...)
c, err := newAWSCloud(strings.NewReader("[global]"), awsServices)
if err != nil {
t.Errorf("Error building aws cloud: %v", err)
return
}
instance, err := c.findInstanceByNodeName(nodeName)
if err != nil {
t.Errorf("Failed to find instance: %v", err)
return
}
if *instance.InstanceId != "i-running" {
t.Errorf("Expected running instance but got %v", *instance.InstanceId)
}
}
func TestFindInstancesByNodeName(t *testing.T) {
awsServices := NewFakeAWSServices()
nodeNameOne := "my-dns.internal"
nodeNameTwo := "my-dns-two.internal"
var tag ec2.Tag
tag.Key = aws.String(TagNameKubernetesCluster)
tag.Value = aws.String(TestClusterId)
tags := []*ec2.Tag{&tag}
var runningInstance ec2.Instance
runningInstance.InstanceId = aws.String("i-running")
runningInstance.PrivateDnsName = aws.String(nodeNameOne)
runningInstance.State = &ec2.InstanceState{Code: aws.Int64(16), Name: aws.String("running")}
runningInstance.Tags = tags
var secondInstance ec2.Instance
secondInstance.InstanceId = aws.String("i-running")
secondInstance.PrivateDnsName = aws.String(nodeNameTwo)
secondInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("running")}
secondInstance.Tags = tags
var terminatedInstance ec2.Instance
terminatedInstance.InstanceId = aws.String("i-terminated")
terminatedInstance.PrivateDnsName = aws.String(nodeNameOne)
terminatedInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("terminated")}
terminatedInstance.Tags = tags
instances := []*ec2.Instance{&secondInstance, &runningInstance, &terminatedInstance}
awsServices.instances = append(awsServices.instances, instances...)
c, err := newAWSCloud(strings.NewReader("[global]"), awsServices)
if err != nil {
t.Errorf("Error building aws cloud: %v", err)
return
}
nodeNames := []string{nodeNameOne}
returnedInstances, errr := c.getInstancesByNodeNames(nodeNames)
if errr != nil {
t.Errorf("Failed to find instance: %v", err)
return
}
if len(returnedInstances) != 1 {
t.Errorf("Expected a single isntance but found: %v", returnedInstances)
}
if *returnedInstances[0].PrivateDnsName != nodeNameOne {
t.Errorf("Expected node name %v but got %v", nodeNameOne, returnedInstances[0].PrivateDnsName)
}
}