Merge pull request #66835 from sjenning/aws-exist-check

Automatic merge from submit-queue (batch tested with PRs 67571, 67284, 66835, 68096, 68152). If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md.

cloudprovider: aws: return true on existence check for stopped instances

xref https://bugzilla.redhat.com/show_bug.cgi?id=1559271
xref https://github.com/openshift/origin/issues/19899

background https://github.com/kubernetes/kubernetes/pull/45986#issuecomment-386332055

Basically our customers are hitting this issue where the Node resource is deleted when the AWS instances stop (not terminate).  If the instances restart, the Nodes lose any labeling/taints.

Openstack cloudprovider already made this change https://github.com/kubernetes/kubernetes/pull/59931

fixes https://github.com/kubernetes/kubernetes/issues/45118 for AWS

**Reviewer note**: valid AWS instance states are `pending | running | shutting-down | terminated | stopping | stopped`.  There might be a case for returning `false` for instances in `pending` and/or `terminated` state.  Discuss!

`InstanceID()` changes from https://github.com/kubernetes/kubernetes/pull/45986 credit @rrati 

@derekwaynecarr @smarterclayton @liggitt @justinsb @jsafrane @countspongebob
This commit is contained in:
Kubernetes Submit Queue 2018-08-31 20:41:40 -07:00 committed by GitHub
commit ba781540e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 26 deletions

View File

@ -1359,6 +1359,12 @@ func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID strin
return false, fmt.Errorf("multiple instances found for instance: %s", instanceID)
}
state := instances[0].State.Name
if *state == ec2.InstanceStateNameTerminated {
glog.Warningf("the instance %s is terminated", instanceID)
return false, nil
}
return true, nil
}
@ -4314,13 +4320,22 @@ func mapInstanceToNodeName(i *ec2.Instance) types.NodeName {
return types.NodeName(aws.StringValue(i.PrivateDnsName))
}
var aliveFilter = []string{
ec2.InstanceStateNamePending,
ec2.InstanceStateNameRunning,
ec2.InstanceStateNameShuttingDown,
ec2.InstanceStateNameStopping,
ec2.InstanceStateNameStopped,
}
// Returns the instance with the specified node name
// Returns nil if it does not exist
func (c *Cloud) findInstanceByNodeName(nodeName types.NodeName) (*ec2.Instance, error) {
privateDNSName := mapNodeNameToPrivateDNSName(nodeName)
filters := []*ec2.Filter{
newEc2Filter("private-dns-name", privateDNSName),
newEc2Filter("instance-state-name", "running"),
// exclude instances in "terminated" state
newEc2Filter("instance-state-name", aliveFilter...),
}
instances, err := c.describeInstances(filters)

View File

@ -798,6 +798,18 @@ func TestIpPermissionExistsHandlesMultipleGroupIdsWithUserIds(t *testing.T) {
}
func TestFindInstanceByNodeNameExcludesTerminatedInstances(t *testing.T) {
awsStates := []struct {
id int64
state string
expected bool
}{
{0, ec2.InstanceStateNamePending, true},
{16, ec2.InstanceStateNameRunning, true},
{32, ec2.InstanceStateNameShuttingDown, true},
{48, ec2.InstanceStateNameTerminated, false},
{64, ec2.InstanceStateNameStopping, true},
{80, ec2.InstanceStateNameStopped, true},
}
awsServices := newMockedFakeAWSServices(TestClusterId)
nodeName := types.NodeName("my-dns.internal")
@ -807,36 +819,41 @@ func TestFindInstanceByNodeNameExcludesTerminatedInstances(t *testing.T) {
tag.Value = aws.String(TestClusterId)
tags := []*ec2.Tag{&tag}
var runningInstance ec2.Instance
runningInstance.InstanceId = aws.String("i-running")
runningInstance.PrivateDnsName = aws.String(string(nodeName))
runningInstance.State = &ec2.InstanceState{Code: aws.Int64(16), Name: aws.String("running")}
runningInstance.Tags = tags
var testInstance ec2.Instance
testInstance.PrivateDnsName = aws.String(string(nodeName))
testInstance.Tags = tags
var terminatedInstance ec2.Instance
terminatedInstance.InstanceId = aws.String("i-terminated")
terminatedInstance.PrivateDnsName = aws.String(string(nodeName))
terminatedInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("terminated")}
terminatedInstance.Tags = tags
awsDefaultInstances := awsServices.instances
for _, awsState := range awsStates {
id := "i-" + awsState.state
testInstance.InstanceId = aws.String(id)
testInstance.State = &ec2.InstanceState{Code: aws.Int64(awsState.id), Name: aws.String(awsState.state)}
instances := []*ec2.Instance{&terminatedInstance, &runningInstance}
awsServices.instances = append(awsServices.instances, instances...)
awsServices.instances = append(awsDefaultInstances, &testInstance)
c, err := newAWSCloud(CloudConfig{}, awsServices)
if err != nil {
t.Errorf("Error building aws cloud: %v", err)
return
}
c, err := newAWSCloud(CloudConfig{}, awsServices)
if err != nil {
t.Errorf("Error building aws cloud: %v", err)
return
}
instance, err := c.findInstanceByNodeName(nodeName)
resultInstance, err := c.findInstanceByNodeName(nodeName)
if err != nil {
t.Errorf("Failed to find instance: %v", err)
return
}
if *instance.InstanceId != "i-running" {
t.Errorf("Expected running instance but got %v", *instance.InstanceId)
if awsState.expected {
if err != nil || resultInstance == nil {
t.Errorf("Expected to find instance %v", *testInstance.InstanceId)
return
}
if *resultInstance.InstanceId != *testInstance.InstanceId {
t.Errorf("Wrong instance returned by findInstanceByNodeName() expected: %v, actual: %v", *testInstance.InstanceId, *resultInstance.InstanceId)
return
}
} else {
if err == nil && resultInstance != nil {
t.Errorf("Did not expect to find instance %v", *resultInstance.InstanceId)
return
}
}
}
}