mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-26 21:17:23 +00:00
AWS: Fix race in security-group read/create
We need to find the ID for a named security group, or create a new one. We do this by listing the security groups, and then doing a create if we cannot find one. This is a race though; against another thread if the AWS API were consistent, but generally because the AWS API is actually eventually consistent. We wrap it in a retry loop.
This commit is contained in:
parent
d7bace23ff
commit
092d407a48
@ -50,6 +50,12 @@ const ProviderName = "aws"
|
|||||||
// The tag name we use to differentiate multiple logically independent clusters running in the same AZ
|
// The tag name we use to differentiate multiple logically independent clusters running in the same AZ
|
||||||
const TagNameKubernetesCluster = "KubernetesCluster"
|
const TagNameKubernetesCluster = "KubernetesCluster"
|
||||||
|
|
||||||
|
// We sometimes read to see if something exists; then try to create it if we didn't find it
|
||||||
|
// This can fail once in a consistent system if done in parallel
|
||||||
|
// In an eventually consistent system, it could fail unboundedly
|
||||||
|
// MaxReadThenCreateRetries sets the maxiumum number of attempts we will make
|
||||||
|
const MaxReadThenCreateRetries = 30
|
||||||
|
|
||||||
// Abstraction over AWS, to allow mocking/other implementations
|
// Abstraction over AWS, to allow mocking/other implementations
|
||||||
type AWSServices interface {
|
type AWSServices interface {
|
||||||
Compute(region string) (EC2, error)
|
Compute(region string) (EC2, error)
|
||||||
@ -1656,37 +1662,54 @@ func (s *AWSCloud) removeSecurityGroupIngress(securityGroupId string, removePerm
|
|||||||
// Makes sure the security group exists
|
// Makes sure the security group exists
|
||||||
// Returns the security group id or error
|
// Returns the security group id or error
|
||||||
func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID string) (string, error) {
|
func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID string) (string, error) {
|
||||||
request := &ec2.DescribeSecurityGroupsInput{}
|
groupID := ""
|
||||||
filters := []*ec2.Filter{
|
attempt := 0
|
||||||
newEc2Filter("group-name", name),
|
for {
|
||||||
newEc2Filter("vpc-id", vpcID),
|
attempt++
|
||||||
}
|
|
||||||
request.Filters = s.addFilters(filters)
|
|
||||||
|
|
||||||
securityGroups, err := s.ec2.DescribeSecurityGroups(request)
|
request := &ec2.DescribeSecurityGroupsInput{}
|
||||||
if err != nil {
|
filters := []*ec2.Filter{
|
||||||
return "", err
|
newEc2Filter("group-name", name),
|
||||||
}
|
newEc2Filter("vpc-id", vpcID),
|
||||||
|
}
|
||||||
if len(securityGroups) >= 1 {
|
request.Filters = s.addFilters(filters)
|
||||||
if len(securityGroups) > 1 {
|
|
||||||
glog.Warning("Found multiple security groups with name:", name)
|
securityGroups, err := s.ec2.DescribeSecurityGroups(request)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(securityGroups) >= 1 {
|
||||||
|
if len(securityGroups) > 1 {
|
||||||
|
glog.Warning("Found multiple security groups with name:", name)
|
||||||
|
}
|
||||||
|
return orEmpty(securityGroups[0].GroupID), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
createRequest := &ec2.CreateSecurityGroupInput{}
|
||||||
|
createRequest.VPCID = &vpcID
|
||||||
|
createRequest.GroupName = &name
|
||||||
|
createRequest.Description = &description
|
||||||
|
|
||||||
|
createResponse, err := s.ec2.CreateSecurityGroup(createRequest)
|
||||||
|
if err != nil {
|
||||||
|
ignore := false
|
||||||
|
if awsError, ok := err.(awserr.Error); ok {
|
||||||
|
if awsError.Code() == "InvalidGroup.Duplicate" && attempt < MaxReadThenCreateRetries {
|
||||||
|
glog.V(2).Infof("Got InvalidGroup.Duplicate while creating security group (race?); will retry")
|
||||||
|
ignore = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !ignore {
|
||||||
|
glog.Error("error creating security group: ", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
} else {
|
||||||
|
groupID = orEmpty(createResponse.GroupID)
|
||||||
|
break
|
||||||
}
|
}
|
||||||
return orEmpty(securityGroups[0].GroupID), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
createRequest := &ec2.CreateSecurityGroupInput{}
|
|
||||||
createRequest.VPCID = &vpcID
|
|
||||||
createRequest.GroupName = &name
|
|
||||||
createRequest.Description = &description
|
|
||||||
|
|
||||||
createResponse, err := s.ec2.CreateSecurityGroup(createRequest)
|
|
||||||
if err != nil {
|
|
||||||
glog.Error("error creating security group: ", err)
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
groupID := orEmpty(createResponse.GroupID)
|
|
||||||
if groupID == "" {
|
if groupID == "" {
|
||||||
return "", fmt.Errorf("created security group, but id was not returned: %s", name)
|
return "", fmt.Errorf("created security group, but id was not returned: %s", name)
|
||||||
}
|
}
|
||||||
@ -1702,7 +1725,7 @@ func (s *AWSCloud) ensureSecurityGroup(name string, description string, vpcID st
|
|||||||
tagRequest := &ec2.CreateTagsInput{}
|
tagRequest := &ec2.CreateTagsInput{}
|
||||||
tagRequest.Resources = []*string{&groupID}
|
tagRequest.Resources = []*string{&groupID}
|
||||||
tagRequest.Tags = tags
|
tagRequest.Tags = tags
|
||||||
_, err = s.createTags(tagRequest)
|
_, err := s.createTags(tagRequest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Not clear how to recover fully from this; we're OK because we don't match on tags, but that is a little odd
|
// Not clear how to recover fully from this; we're OK because we don't match on tags, but that is a little odd
|
||||||
return "", fmt.Errorf("error tagging security group: %v", err)
|
return "", fmt.Errorf("error tagging security group: %v", err)
|
||||||
|
Loading…
Reference in New Issue
Block a user