AWS: Recover if tags missing on security group

In the AWS API (generally) we tag things we create, and then we filter
to find them.  However, creation & tagging are typically two separate
calls.  So there is a chance that we will create an object, but fail to
tag it.

We fix this (done here in the case of security groups, but we can do
this more generally) by retrieving the resource without a tag filter.
If the retrieved resource has the correct tags, great.  If it has the
tags for another cluster, that's a problem, and we raise an error.  If
it has no tags at all, we add the tags.

This only works where the resource is uniquely named (or we can
otherwise retrieve it uniquely).  For security groups, the SG name comes
from the service UUID, so that's unique.

Fixes #11324
This commit is contained in:
Justin Santa Barbara 2016-02-20 14:30:35 -05:00
parent 6fef6bc977
commit 22d719018a

View File

@ -1367,17 +1367,7 @@ func (s *AWSCloud) CreateDisk(volumeOptions *VolumeOptions) (string, error) {
// apply tags
if volumeOptions.Tags != nil {
tags := []*ec2.Tag{}
for k, v := range *volumeOptions.Tags {
tag := &ec2.Tag{}
tag.Key = aws.String(k)
tag.Value = aws.String(v)
tags = append(tags, tag)
}
tagRequest := &ec2.CreateTagsInput{}
tagRequest.Resources = []*string{&awsID}
tagRequest.Tags = tags
if _, err := s.createTags(tagRequest); err != nil {
if err := s.createTags(awsID, *volumeOptions.Tags); err != nil {
// delete the volume and hope it succeeds
_, delerr := s.DeleteDisk(volumeName)
if delerr != nil {
@ -1682,6 +1672,36 @@ func (s *AWSCloud) removeSecurityGroupIngress(securityGroupId string, removePerm
return true, nil
}
// Ensure that a resource has the correct tags
// If it has no tags, we assume that this was a problem caused by an error in between creation and tagging,
// and we add the tags. If it has a different cluster's tags, that is an error.
func (s *AWSCloud) ensureClusterTags(resourceID string, tags []*ec2.Tag) error {
actualTags := make(map[string]string)
for _, tag := range tags {
actualTags[aws.StringValue(tag.Key)] = aws.StringValue(tag.Value)
}
addTags := make(map[string]string)
for k, expected := range s.filterTags {
actual := actualTags[k]
if actual == expected {
continue
}
if actual == "" {
glog.Warningf("Resource %q was missing expected cluster tag %q. Will add (with value %q)", resourceID, k, expected)
addTags[k] = expected
} else {
return fmt.Errorf("resource %q has tag belonging to another cluster: %q=%q (expected %q)", resourceID, k, actual, expected)
}
}
if err := s.createTags(resourceID, addTags); err != nil {
return fmt.Errorf("error adding missing tags to resource %q: %v", resourceID, err)
}
return nil
}
// Makes sure the security group exists
// Returns the security group id or error
func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID string) (string, error) {
@ -1695,7 +1715,12 @@ func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID st
newEc2Filter("group-name", name),
newEc2Filter("vpc-id", vpcID),
}
request.Filters = s.addFilters(filters)
// Note that we do _not_ add our tag filters; group-name + vpc-id is the EC2 primary key.
// However, we do check that it matches our tags.
// If it doesn't have any tags, we tag it; this is how we recover if we failed to tag before.
// If it has a different cluster's tags, that is an error.
// This shouldn't happen because name is expected to be globally unique (UUID derived)
request.Filters = filters
securityGroups, err := s.ec2.DescribeSecurityGroups(request)
if err != nil {
@ -1706,7 +1731,12 @@ func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID st
if len(securityGroups) > 1 {
glog.Warning("Found multiple security groups with name:", name)
}
return orEmpty(securityGroups[0].GroupId), nil
err := s.ensureClusterTags(aws.StringValue(securityGroups[0].GroupId), securityGroups[0].Tags)
if err != nil {
return "", err
}
return aws.StringValue(securityGroups[0].GroupId), nil
}
createRequest := &ec2.CreateSecurityGroupInput{}
@ -1738,22 +1768,13 @@ func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID st
return "", fmt.Errorf("created security group, but id was not returned: %s", name)
}
tags := []*ec2.Tag{}
for k, v := range s.filterTags {
tag := &ec2.Tag{}
tag.Key = aws.String(k)
tag.Value = aws.String(v)
tags = append(tags, tag)
}
if len(tags) > 0 {
tagRequest := &ec2.CreateTagsInput{}
tagRequest.Resources = []*string{&groupID}
tagRequest.Tags = tags
if _, err := s.createTags(tagRequest); err != nil {
// Not clear how to recover fully from this; we're OK because we don't match on tags, but that is a little odd
return "", fmt.Errorf("error tagging security group: %v", err)
}
err := s.createTags(groupID, s.filterTags)
if err != nil {
// If we retry, ensureClusterTags will recover from this - it
// will add the missing tags. We could delete the security
// group here, but that doesn't feel like the right thing, as
// the caller is likely to retry the create
return "", fmt.Errorf("error tagging security group: %v", err)
}
return groupID, nil
}
@ -1761,15 +1782,32 @@ func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID st
// createTags calls EC2 CreateTags, but adds retry-on-failure logic
// We retry mainly because if we create an object, we cannot tag it until it is "fully created" (eventual consistency)
// The error code varies though (depending on what we are tagging), so we simply retry on all errors
func (s *AWSCloud) createTags(request *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) {
func (s *AWSCloud) createTags(resourceID string, tags map[string]string) error {
if tags == nil || len(tags) == 0 {
return nil
}
var awsTags []*ec2.Tag
for k, v := range tags {
tag := &ec2.Tag{
Key: aws.String(k),
Value: aws.String(v),
}
awsTags = append(awsTags, tag)
}
request := &ec2.CreateTagsInput{}
request.Resources = []*string{&resourceID}
request.Tags = awsTags
// TODO: We really should do exponential backoff here
attempt := 0
maxAttempts := 60
for {
response, err := s.ec2.CreateTags(request)
_, err := s.ec2.CreateTags(request)
if err == nil {
return response, err
return nil
}
// We could check that the error is retryable, but the error code changes based on what we are tagging
@ -1777,7 +1815,7 @@ func (s *AWSCloud) createTags(request *ec2.CreateTagsInput) (*ec2.CreateTagsOutp
attempt++
if attempt > maxAttempts {
glog.Warningf("Failed to create tags (too many attempts): %v", err)
return response, err
return err
}
glog.V(2).Infof("Failed to create tags; will retry. Error was %v", err)
time.Sleep(1 * time.Second)