mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Merge pull request #66835 from sjenning/aws-exist-check
Automatic merge from submit-queue (batch tested with PRs 67571, 67284, 66835, 68096, 68152). If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md. cloudprovider: aws: return true on existence check for stopped instances xref https://bugzilla.redhat.com/show_bug.cgi?id=1559271 xref https://github.com/openshift/origin/issues/19899 background https://github.com/kubernetes/kubernetes/pull/45986#issuecomment-386332055 Basically our customers are hitting this issue where the Node resource is deleted when the AWS instances stop (not terminate). If the instances restart, the Nodes lose any labeling/taints. Openstack cloudprovider already made this change https://github.com/kubernetes/kubernetes/pull/59931 fixes https://github.com/kubernetes/kubernetes/issues/45118 for AWS **Reviewer note**: valid AWS instance states are `pending | running | shutting-down | terminated | stopping | stopped`. There might be a case for returning `false` for instances in `pending` and/or `terminated` state. Discuss! `InstanceID()` changes from https://github.com/kubernetes/kubernetes/pull/45986 credit @rrati @derekwaynecarr @smarterclayton @liggitt @justinsb @jsafrane @countspongebob
This commit is contained in:
commit
ba781540e0
@ -1359,6 +1359,12 @@ func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID strin
|
||||
return false, fmt.Errorf("multiple instances found for instance: %s", instanceID)
|
||||
}
|
||||
|
||||
state := instances[0].State.Name
|
||||
if *state == ec2.InstanceStateNameTerminated {
|
||||
glog.Warningf("the instance %s is terminated", instanceID)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@ -4314,13 +4320,22 @@ func mapInstanceToNodeName(i *ec2.Instance) types.NodeName {
|
||||
return types.NodeName(aws.StringValue(i.PrivateDnsName))
|
||||
}
|
||||
|
||||
var aliveFilter = []string{
|
||||
ec2.InstanceStateNamePending,
|
||||
ec2.InstanceStateNameRunning,
|
||||
ec2.InstanceStateNameShuttingDown,
|
||||
ec2.InstanceStateNameStopping,
|
||||
ec2.InstanceStateNameStopped,
|
||||
}
|
||||
|
||||
// Returns the instance with the specified node name
|
||||
// Returns nil if it does not exist
|
||||
func (c *Cloud) findInstanceByNodeName(nodeName types.NodeName) (*ec2.Instance, error) {
|
||||
privateDNSName := mapNodeNameToPrivateDNSName(nodeName)
|
||||
filters := []*ec2.Filter{
|
||||
newEc2Filter("private-dns-name", privateDNSName),
|
||||
newEc2Filter("instance-state-name", "running"),
|
||||
// exclude instances in "terminated" state
|
||||
newEc2Filter("instance-state-name", aliveFilter...),
|
||||
}
|
||||
|
||||
instances, err := c.describeInstances(filters)
|
||||
|
@ -798,6 +798,18 @@ func TestIpPermissionExistsHandlesMultipleGroupIdsWithUserIds(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFindInstanceByNodeNameExcludesTerminatedInstances(t *testing.T) {
|
||||
awsStates := []struct {
|
||||
id int64
|
||||
state string
|
||||
expected bool
|
||||
}{
|
||||
{0, ec2.InstanceStateNamePending, true},
|
||||
{16, ec2.InstanceStateNameRunning, true},
|
||||
{32, ec2.InstanceStateNameShuttingDown, true},
|
||||
{48, ec2.InstanceStateNameTerminated, false},
|
||||
{64, ec2.InstanceStateNameStopping, true},
|
||||
{80, ec2.InstanceStateNameStopped, true},
|
||||
}
|
||||
awsServices := newMockedFakeAWSServices(TestClusterId)
|
||||
|
||||
nodeName := types.NodeName("my-dns.internal")
|
||||
@ -807,36 +819,41 @@ func TestFindInstanceByNodeNameExcludesTerminatedInstances(t *testing.T) {
|
||||
tag.Value = aws.String(TestClusterId)
|
||||
tags := []*ec2.Tag{&tag}
|
||||
|
||||
var runningInstance ec2.Instance
|
||||
runningInstance.InstanceId = aws.String("i-running")
|
||||
runningInstance.PrivateDnsName = aws.String(string(nodeName))
|
||||
runningInstance.State = &ec2.InstanceState{Code: aws.Int64(16), Name: aws.String("running")}
|
||||
runningInstance.Tags = tags
|
||||
var testInstance ec2.Instance
|
||||
testInstance.PrivateDnsName = aws.String(string(nodeName))
|
||||
testInstance.Tags = tags
|
||||
|
||||
var terminatedInstance ec2.Instance
|
||||
terminatedInstance.InstanceId = aws.String("i-terminated")
|
||||
terminatedInstance.PrivateDnsName = aws.String(string(nodeName))
|
||||
terminatedInstance.State = &ec2.InstanceState{Code: aws.Int64(48), Name: aws.String("terminated")}
|
||||
terminatedInstance.Tags = tags
|
||||
awsDefaultInstances := awsServices.instances
|
||||
for _, awsState := range awsStates {
|
||||
id := "i-" + awsState.state
|
||||
testInstance.InstanceId = aws.String(id)
|
||||
testInstance.State = &ec2.InstanceState{Code: aws.Int64(awsState.id), Name: aws.String(awsState.state)}
|
||||
|
||||
instances := []*ec2.Instance{&terminatedInstance, &runningInstance}
|
||||
awsServices.instances = append(awsServices.instances, instances...)
|
||||
awsServices.instances = append(awsDefaultInstances, &testInstance)
|
||||
|
||||
c, err := newAWSCloud(CloudConfig{}, awsServices)
|
||||
if err != nil {
|
||||
t.Errorf("Error building aws cloud: %v", err)
|
||||
return
|
||||
}
|
||||
c, err := newAWSCloud(CloudConfig{}, awsServices)
|
||||
if err != nil {
|
||||
t.Errorf("Error building aws cloud: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
instance, err := c.findInstanceByNodeName(nodeName)
|
||||
resultInstance, err := c.findInstanceByNodeName(nodeName)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Failed to find instance: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if *instance.InstanceId != "i-running" {
|
||||
t.Errorf("Expected running instance but got %v", *instance.InstanceId)
|
||||
if awsState.expected {
|
||||
if err != nil || resultInstance == nil {
|
||||
t.Errorf("Expected to find instance %v", *testInstance.InstanceId)
|
||||
return
|
||||
}
|
||||
if *resultInstance.InstanceId != *testInstance.InstanceId {
|
||||
t.Errorf("Wrong instance returned by findInstanceByNodeName() expected: %v, actual: %v", *testInstance.InstanceId, *resultInstance.InstanceId)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err == nil && resultInstance != nil {
|
||||
t.Errorf("Did not expect to find instance %v", *resultInstance.InstanceId)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user