mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 06:54:01 +00:00
Only find running aws hosts by nodename
When finding instance by node name in AWS, only retrieve running instances. Otherwise terminated, old nodes can show up with the same tag when rebuilding nodes in the cluster. Another improvement made is to filter instances by the node names provided, rather than selecting all instances and filtering in code. Authors: @jsravn, @chbatey, @balooo
This commit is contained in:
parent
7f1b699880
commit
087ff78cf9
@ -772,7 +772,7 @@ func (aws *AWSCloud) ExternalID(name string) (string, error) {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if instance == nil || !isAlive(instance) {
|
||||
if instance == nil {
|
||||
return "", cloudprovider.InstanceNotFound
|
||||
}
|
||||
return orEmpty(instance.InstanceId), nil
|
||||
@ -817,28 +817,9 @@ func (aws *AWSCloud) InstanceType(name string) (string, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the instance is alive (running or pending)
|
||||
// We typically ignore instances that are not alive
|
||||
func isAlive(instance *ec2.Instance) bool {
|
||||
if instance.State == nil {
|
||||
glog.Warning("Instance state was unexpectedly nil: ", instance)
|
||||
return false
|
||||
}
|
||||
stateName := orEmpty(instance.State.Name)
|
||||
switch stateName {
|
||||
case "shutting-down", "terminated", "stopping", "stopped":
|
||||
return false
|
||||
case "pending", "running":
|
||||
return true
|
||||
default:
|
||||
glog.Errorf("Unknown EC2 instance state: %s", stateName)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Return a list of instances matching regex string.
|
||||
func (s *AWSCloud) getInstancesByRegex(regex string) ([]string, error) {
|
||||
filters := []*ec2.Filter{}
|
||||
filters := []*ec2.Filter{newEc2Filter("instance-state-name", "running")}
|
||||
filters = s.addFilters(filters)
|
||||
request := &ec2.DescribeInstancesInput{
|
||||
Filters: filters,
|
||||
@ -864,11 +845,6 @@ func (s *AWSCloud) getInstancesByRegex(regex string) ([]string, error) {
|
||||
|
||||
matchingInstances := []string{}
|
||||
for _, instance := range instances {
|
||||
// TODO: Push filtering down into EC2 API filter?
|
||||
if !isAlive(instance) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Only return fully-ready instances when listing instances
|
||||
// (vs a query by name, where we will return it if we find it)
|
||||
if orEmpty(instance.State.Name) == "pending" {
|
||||
@ -2365,54 +2341,37 @@ func (a *AWSCloud) getInstancesByIDs(instanceIDs []*string) (map[string]*ec2.Ins
|
||||
}
|
||||
|
||||
// Fetches instances by node names; returns an error if any cannot be found.
|
||||
// This is currently implemented by fetching all the instances, because this is currently called for all nodes (i.e. the majority)
|
||||
// In practice, the breakeven vs looping through and calling getInstanceByNodeName is probably around N=2.
|
||||
// This is implemented with a multi value filter on the node names, fetching the desired instances with a single query.
|
||||
func (a *AWSCloud) getInstancesByNodeNames(nodeNames []string) ([]*ec2.Instance, error) {
|
||||
allInstances, err := a.getAllInstances()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
names := aws.StringSlice(nodeNames)
|
||||
|
||||
nodeNameFilter := &ec2.Filter{
|
||||
Name: aws.String("private-dns-name"),
|
||||
Values: names,
|
||||
}
|
||||
|
||||
nodeNamesMap := make(map[string]int, len(nodeNames))
|
||||
for i, nodeName := range nodeNames {
|
||||
nodeNamesMap[nodeName] = i
|
||||
filters := []*ec2.Filter{
|
||||
nodeNameFilter,
|
||||
newEc2Filter("instance-state-name", "running"),
|
||||
}
|
||||
|
||||
instances := make([]*ec2.Instance, len(nodeNames))
|
||||
for _, instance := range allInstances {
|
||||
nodeName := aws.StringValue(instance.PrivateDnsName)
|
||||
if nodeName == "" {
|
||||
if isAlive(instance) {
|
||||
glog.V(2).Infof("ignoring ec2 instance with no PrivateDnsName: %q", aws.StringValue(instance.InstanceId))
|
||||
}
|
||||
continue
|
||||
}
|
||||
i, found := nodeNamesMap[nodeName]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
instances[i] = instance
|
||||
}
|
||||
|
||||
for i, instance := range instances {
|
||||
if instance == nil {
|
||||
nodeName := nodeNames[i]
|
||||
return nil, fmt.Errorf("unable to find instance %q", nodeName)
|
||||
}
|
||||
}
|
||||
|
||||
return instances, nil
|
||||
}
|
||||
|
||||
// Returns all instances that are tagged as being in this cluster.
|
||||
func (a *AWSCloud) getAllInstances() ([]*ec2.Instance, error) {
|
||||
filters := []*ec2.Filter{}
|
||||
filters = a.addFilters(filters)
|
||||
request := &ec2.DescribeInstancesInput{
|
||||
Filters: filters,
|
||||
}
|
||||
|
||||
return a.ec2.DescribeInstances(request)
|
||||
instances, err := a.ec2.DescribeInstances(request)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Failed to describe instances %v", nodeNames)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(instances) == 0 {
|
||||
glog.V(3).Infof("Failed to find any instances %v", nodeNames)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return instances, nil
|
||||
}
|
||||
|
||||
// Returns the instance with the specified node name
|
||||
@ -2420,6 +2379,7 @@ func (a *AWSCloud) getAllInstances() ([]*ec2.Instance, error) {
|
||||
func (a *AWSCloud) findInstanceByNodeName(nodeName string) (*ec2.Instance, error) {
|
||||
filters := []*ec2.Filter{
|
||||
newEc2Filter("private-dns-name", nodeName),
|
||||
newEc2Filter("instance-state-name", "running"),
|
||||
}
|
||||
filters = a.addFilters(filters)
|
||||
request := &ec2.DescribeInstancesInput{
|
||||
|
@ -275,6 +275,20 @@ func instanceMatchesFilter(instance *ec2.Instance, filter *ec2.Filter) bool {
|
||||
}
|
||||
return contains(filter.Values, *instance.PrivateDnsName)
|
||||
}
|
||||
|
||||
if name == "instance-state-name" {
|
||||
return contains(filter.Values, *instance.State.Name)
|
||||
}
|
||||
|
||||
if name == "tag:"+TagNameKubernetesCluster {
|
||||
for _, tag := range instance.Tags {
|
||||
if *tag.Key == TagNameKubernetesCluster {
|
||||
return contains(filter.Values, *tag.Value)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
panic("Unknown filter name: " + name)
|
||||
}
|
||||
|
||||
@ -937,3 +951,102 @@ func TestIpPermissionExistsHandlesMultipleGroupIdsWithUserIds(t *testing.T) {
|
||||
t.Errorf("Should have not been considered equal since first is not in the second array of groups")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindInstanceByNodeNameExcludesTerminatedInstances(t *testing.T) {
|
||||
awsServices := NewFakeAWSServices()
|
||||
|
||||
nodeName := "my-dns.internal"
|
||||
|
||||
var tag ec2.Tag
|
||||
tag.Key = aws.String(TagNameKubernetesCluster)
|
||||
tag.Value = aws.String(TestClusterId)
|
||||
tags := []*ec2.Tag{&tag}
|
||||
|
||||
var runningInstance ec2.Instance
|
||||
runningInstance.InstanceId = aws.String("i-running")
|
||||
runningInstance.PrivateDnsName = aws.String(nodeName)
|
||||
runningInstance.State = &ec2.InstanceState{Code: aws.Int64(16), Name: aws.String("running")}
|
||||
runningInstance.Tags = tags
|
||||
|
||||
var terminatedInstance ec2.Instance
|
||||
terminatedInstance.InstanceId = aws.String("i-terminated")
|
||||
terminatedInstance.PrivateDnsName = aws.String(nodeName)
|
||||
terminatedInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("terminated")}
|
||||
terminatedInstance.Tags = tags
|
||||
|
||||
instances := []*ec2.Instance{&terminatedInstance, &runningInstance}
|
||||
awsServices.instances = append(awsServices.instances, instances...)
|
||||
|
||||
c, err := newAWSCloud(strings.NewReader("[global]"), awsServices)
|
||||
if err != nil {
|
||||
t.Errorf("Error building aws cloud: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
instance, err := c.findInstanceByNodeName(nodeName)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Failed to find instance: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if *instance.InstanceId != "i-running" {
|
||||
t.Errorf("Expected running instance but got %v", *instance.InstanceId)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindInstancesByNodeName(t *testing.T) {
|
||||
awsServices := NewFakeAWSServices()
|
||||
|
||||
nodeNameOne := "my-dns.internal"
|
||||
nodeNameTwo := "my-dns-two.internal"
|
||||
|
||||
var tag ec2.Tag
|
||||
tag.Key = aws.String(TagNameKubernetesCluster)
|
||||
tag.Value = aws.String(TestClusterId)
|
||||
tags := []*ec2.Tag{&tag}
|
||||
|
||||
var runningInstance ec2.Instance
|
||||
runningInstance.InstanceId = aws.String("i-running")
|
||||
runningInstance.PrivateDnsName = aws.String(nodeNameOne)
|
||||
runningInstance.State = &ec2.InstanceState{Code: aws.Int64(16), Name: aws.String("running")}
|
||||
runningInstance.Tags = tags
|
||||
|
||||
var secondInstance ec2.Instance
|
||||
|
||||
secondInstance.InstanceId = aws.String("i-running")
|
||||
secondInstance.PrivateDnsName = aws.String(nodeNameTwo)
|
||||
secondInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("running")}
|
||||
secondInstance.Tags = tags
|
||||
|
||||
var terminatedInstance ec2.Instance
|
||||
terminatedInstance.InstanceId = aws.String("i-terminated")
|
||||
terminatedInstance.PrivateDnsName = aws.String(nodeNameOne)
|
||||
terminatedInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("terminated")}
|
||||
terminatedInstance.Tags = tags
|
||||
|
||||
instances := []*ec2.Instance{&secondInstance, &runningInstance, &terminatedInstance}
|
||||
awsServices.instances = append(awsServices.instances, instances...)
|
||||
|
||||
c, err := newAWSCloud(strings.NewReader("[global]"), awsServices)
|
||||
if err != nil {
|
||||
t.Errorf("Error building aws cloud: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
nodeNames := []string{nodeNameOne}
|
||||
returnedInstances, errr := c.getInstancesByNodeNames(nodeNames)
|
||||
|
||||
if errr != nil {
|
||||
t.Errorf("Failed to find instance: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(returnedInstances) != 1 {
|
||||
t.Errorf("Expected a single isntance but found: %v", returnedInstances)
|
||||
}
|
||||
|
||||
if *returnedInstances[0].PrivateDnsName != nodeNameOne {
|
||||
t.Errorf("Expected node name %v but got %v", nodeNameOne, returnedInstances[0].PrivateDnsName)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user