AWS EBS: Remove the attached volumes cache

There are known issues with the attached-volume state cache that just aren't
possible to fix with the current interface.

Replace it with a map of the active attach jobs (that was the original
requirement, to avoid a nasty race condition).

This costs us an extra DescribeInstance call on attach/detach, but that
seems worth it if it ends this class of bugs.

Fix #15073
This commit is contained in:
Justin Santa Barbara 2016-03-10 07:47:30 -05:00
parent d7a87c2285
commit 79b2b7edef

View File

@ -932,9 +932,10 @@ type awsInstance struct {
mutex sync.Mutex
// We must cache because otherwise there is a race condition,
// where we assign a device mapping and then get a second request before we attach the volume
deviceMappings map[mountDevice]string
// We keep an active list of devices we have assigned but not yet
// attached, to avoid a race condition where we assign a device mapping
// and then get a second request before we attach the volume
attaching map[mountDevice]string
}
// newAWSInstance creates a new awsInstance object
@ -953,8 +954,7 @@ func newAWSInstance(ec2Service EC2, instance *ec2.Instance) *awsInstance {
subnetID: aws.StringValue(instance.SubnetId),
}
// We lazy-init deviceMappings
self.deviceMappings = nil
self.attaching = make(map[mountDevice]string)
return self
}
@ -1001,31 +1001,31 @@ func (self *awsInstance) getMountDevice(volumeID string, assign bool) (assigned
self.mutex.Lock()
defer self.mutex.Unlock()
// We cache both for efficiency and correctness
if self.deviceMappings == nil {
info, err := self.describeInstance()
if err != nil {
return "", false, err
info, err := self.describeInstance()
if err != nil {
return "", false, err
}
deviceMappings := map[mountDevice]string{}
for _, blockDevice := range info.BlockDeviceMappings {
name := aws.StringValue(blockDevice.DeviceName)
if strings.HasPrefix(name, "/dev/sd") {
name = name[7:]
}
deviceMappings := map[mountDevice]string{}
for _, blockDevice := range info.BlockDeviceMappings {
name := aws.StringValue(blockDevice.DeviceName)
if strings.HasPrefix(name, "/dev/sd") {
name = name[7:]
}
if strings.HasPrefix(name, "/dev/xvd") {
name = name[8:]
}
if len(name) != 1 {
glog.Warningf("Unexpected EBS DeviceName: %q", aws.StringValue(blockDevice.DeviceName))
}
deviceMappings[mountDevice(name)] = aws.StringValue(blockDevice.Ebs.VolumeId)
if strings.HasPrefix(name, "/dev/xvd") {
name = name[8:]
}
self.deviceMappings = deviceMappings
if len(name) != 1 {
glog.Warningf("Unexpected EBS DeviceName: %q", aws.StringValue(blockDevice.DeviceName))
}
deviceMappings[mountDevice(name)] = aws.StringValue(blockDevice.Ebs.VolumeId)
}
for mountDevice, volume := range self.attaching {
deviceMappings[mountDevice] = volume
}
// Check to see if this volume is already assigned a device on this machine
for mountDevice, mappingVolumeID := range self.deviceMappings {
for mountDevice, mappingVolumeID := range deviceMappings {
if volumeID == mappingVolumeID {
if assign {
glog.Warningf("Got assignment call for already-assigned volume: %s@%s", mountDevice, mappingVolumeID)
@ -1042,7 +1042,7 @@ func (self *awsInstance) getMountDevice(volumeID string, assign bool) (assigned
valid := instanceType.getEBSMountDevices()
chosen := mountDevice("")
for _, mountDevice := range valid {
_, found := self.deviceMappings[mountDevice]
_, found := deviceMappings[mountDevice]
if !found {
chosen = mountDevice
break
@ -1050,31 +1050,31 @@ func (self *awsInstance) getMountDevice(volumeID string, assign bool) (assigned
}
if chosen == "" {
glog.Warningf("Could not assign a mount device (all in use?). mappings=%v, valid=%v", self.deviceMappings, valid)
glog.Warningf("Could not assign a mount device (all in use?). mappings=%v, valid=%v", deviceMappings, valid)
return "", false, nil
}
self.deviceMappings[chosen] = volumeID
self.attaching[chosen] = volumeID
glog.V(2).Infof("Assigned mount device %s -> volume %s", chosen, volumeID)
return chosen, false, nil
}
func (self *awsInstance) releaseMountDevice(volumeID string, mountDevice mountDevice) {
func (self *awsInstance) endAttaching(volumeID string, mountDevice mountDevice) {
self.mutex.Lock()
defer self.mutex.Unlock()
existingVolumeID, found := self.deviceMappings[mountDevice]
existingVolumeID, found := self.attaching[mountDevice]
if !found {
glog.Errorf("releaseMountDevice on non-allocated device")
glog.Errorf("endAttaching on non-allocated device")
return
}
if volumeID != existingVolumeID {
glog.Errorf("releaseMountDevice on device assigned to different volume")
glog.Errorf("endAttaching on device assigned to different volume")
return
}
glog.V(2).Infof("Releasing mount device mapping: %s -> volume %s", mountDevice, volumeID)
delete(self.deviceMappings, mountDevice)
delete(self.attaching, mountDevice)
}
type awsDisk struct {
@ -1280,8 +1280,8 @@ func (c *AWSCloud) AttachDisk(diskName string, instanceName string, readOnly boo
attached := false
defer func() {
if !attached {
awsInstance.releaseMountDevice(disk.awsID, mountDevice)
if attached {
awsInstance.endAttaching(disk.awsID, mountDevice)
}
}()
@ -1346,33 +1346,15 @@ func (aws *AWSCloud) DetachDisk(diskName string, instanceName string) (string, e
return "", errors.New("no response from DetachVolume")
}
// TODO: Fix this - just remove the cache?
// If we don't have a cache; we don't have to wait any more (the driver does it for us)
// Also, maybe we could get the locally connected drivers from the AWS metadata service?
// At this point we are waiting for the volume being detached. This
// releases the volume and invalidates the cache even when there is a timeout.
//
// TODO: A timeout leaves the cache in an inconsistent state. The volume is still
// detaching though the cache shows it as ready to be attached again. Subsequent
// attach operations will fail. The attach is being retried and eventually
// works though. An option would be to completely flush the cache upon timeouts.
//
defer func() {
// TODO: Not thread safe?
for mountDevice, existingVolumeID := range awsInstance.deviceMappings {
if existingVolumeID == disk.awsID {
awsInstance.releaseMountDevice(disk.awsID, mountDevice)
return
}
}
}()
err = disk.waitForAttachmentStatus("detached")
if err != nil {
return "", err
}
if mountDevice != "" {
awsInstance.endAttaching(disk.awsID, mountDevice)
}
hostDevicePath := "/dev/xvd" + string(mountDevice)
return hostDevicePath, err
}