From a16ee2f514f50888fb04e586163c64db7ec141ea Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Wed, 12 Apr 2017 22:17:11 -0400 Subject: [PATCH] Implement LRU for AWS device allocator In AWS environment when attach fails on the node lets not use device from the pool. This makes sure we don't reuse recently freed devices --- pkg/cloudprovider/providers/aws/aws.go | 13 ++- .../providers/aws/device_allocator.go | 85 ++++++++++++++----- .../providers/aws/device_allocator_test.go | 43 +++++----- 3 files changed, 95 insertions(+), 46 deletions(-) diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go index 151938d6730..1909370a1b3 100644 --- a/pkg/cloudprovider/providers/aws/aws.go +++ b/pkg/cloudprovider/providers/aws/aws.go @@ -1281,9 +1281,13 @@ func (c *Cloud) getMountDevice( // we want device names with two significant characters, starting with /dev/xvdbb // the allowed range is /dev/xvd[b-c][a-z] // http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html - deviceAllocator = NewDeviceAllocator(0) + deviceAllocator = NewDeviceAllocator() c.deviceAllocators[i.nodeName] = deviceAllocator } + // We need to lock deviceAllocator to prevent possible race with Deprioritize function + deviceAllocator.Lock() + defer deviceAllocator.Unlock() + chosen, err := deviceAllocator.GetNext(deviceMappings) if err != nil { glog.Warningf("Could not assign a mount device. mappings=%v, error: %v", deviceMappings, err) @@ -1544,7 +1548,9 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName, // TODO: Check if the volume was concurrently attached? return "", fmt.Errorf("Error attaching EBS volume %q to instance %q: %v", disk.awsID, awsInstance.awsID, err) } - + if da, ok := c.deviceAllocators[awsInstance.nodeName]; ok { + da.Deprioritize(mountDevice) + } glog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse) } @@ -1621,6 +1627,9 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) if err != nil { return "", err } + if da, ok := c.deviceAllocators[awsInstance.nodeName]; ok { + da.Deprioritize(mountDevice) + } if attachment != nil { // We expect it to be nil, it is (maybe) interesting if it is not glog.V(2).Infof("waitForAttachmentStatus returned non-nil attachment with state=detached: %v", attachment) diff --git a/pkg/cloudprovider/providers/aws/device_allocator.go b/pkg/cloudprovider/providers/aws/device_allocator.go index 482d99bf576..490b673bb18 100644 --- a/pkg/cloudprovider/providers/aws/device_allocator.go +++ b/pkg/cloudprovider/providers/aws/device_allocator.go @@ -16,7 +16,11 @@ limitations under the License. package aws -import "fmt" +import ( + "fmt" + "sort" + "sync" +) // ExistingDevices is a map of assigned devices. Presence of a key with a device // name in the map means that the device is allocated. Value is irrelevant and @@ -40,48 +44,87 @@ type DeviceAllocator interface { // name. Only the device suffix is returned, e.g. "ba" for "/dev/xvdba". // It's up to the called to add appropriate "/dev/sd" or "/dev/xvd" prefix. GetNext(existingDevices ExistingDevices) (mountDevice, error) + + // Deprioritize the device so as it can't be used immediately again + Deprioritize(mountDevice) + + // Lock the deviceAllocator + Lock() + + // Unlock the deviceAllocator + Unlock() } type deviceAllocator struct { - possibleDevices []mountDevice - lastIndex int + possibleDevices map[mountDevice]int + counter int + deviceLock sync.Mutex } +var _ DeviceAllocator = &deviceAllocator{} + +type devicePair struct { + deviceName mountDevice + deviceIndex int +} + +type devicePairList []devicePair + +func (p devicePairList) Len() int { return len(p) } +func (p devicePairList) Less(i, j int) bool { return p[i].deviceIndex < p[j].deviceIndex } +func (p devicePairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + // Allocates device names according to scheme ba..bz, ca..cz // it moves along the ring and always picks next device until // device list is exhausted. -func NewDeviceAllocator(lastIndex int) DeviceAllocator { - possibleDevices := []mountDevice{} +func NewDeviceAllocator() DeviceAllocator { + possibleDevices := make(map[mountDevice]int) for _, firstChar := range []rune{'b', 'c'} { for i := 'a'; i <= 'z'; i++ { dev := mountDevice([]rune{firstChar, i}) - possibleDevices = append(possibleDevices, dev) + possibleDevices[dev] = 0 } } return &deviceAllocator{ possibleDevices: possibleDevices, - lastIndex: lastIndex, + counter: 0, } } +// GetNext gets next available device from the pool, this function assumes that caller +// holds the necessary lock on deviceAllocator func (d *deviceAllocator) GetNext(existingDevices ExistingDevices) (mountDevice, error) { - var candidate mountDevice - foundIndex := d.lastIndex - for { - candidate, foundIndex = d.nextDevice(foundIndex + 1) - if _, found := existingDevices[candidate]; !found { - d.lastIndex = foundIndex - return candidate, nil - } - if foundIndex == d.lastIndex { - return "", fmt.Errorf("no devices are available") + for _, devicePair := range d.sortByCount() { + if _, found := existingDevices[devicePair.deviceName]; !found { + return devicePair.deviceName, nil } } + return "", fmt.Errorf("no devices are available") } -func (d *deviceAllocator) nextDevice(nextIndex int) (mountDevice, int) { - if nextIndex < len(d.possibleDevices) { - return d.possibleDevices[nextIndex], nextIndex +func (d *deviceAllocator) sortByCount() devicePairList { + dpl := make(devicePairList, 0) + for deviceName, deviceIndex := range d.possibleDevices { + dpl = append(dpl, devicePair{deviceName, deviceIndex}) + } + sort.Sort(dpl) + return dpl +} + +func (d *deviceAllocator) Lock() { + d.deviceLock.Lock() +} + +func (d *deviceAllocator) Unlock() { + d.deviceLock.Unlock() +} + +// Deprioritize the device so as it can't be used immediately again +func (d *deviceAllocator) Deprioritize(chosen mountDevice) { + d.deviceLock.Lock() + defer d.deviceLock.Unlock() + if _, ok := d.possibleDevices[chosen]; ok { + d.counter++ + d.possibleDevices[chosen] = d.counter } - return d.possibleDevices[0], 0 } diff --git a/pkg/cloudprovider/providers/aws/device_allocator_test.go b/pkg/cloudprovider/providers/aws/device_allocator_test.go index aa377c7b488..5c45dcbf660 100644 --- a/pkg/cloudprovider/providers/aws/device_allocator_test.go +++ b/pkg/cloudprovider/providers/aws/device_allocator_test.go @@ -22,37 +22,22 @@ func TestDeviceAllocator(t *testing.T) { tests := []struct { name string existingDevices ExistingDevices - lastIndex int + deviceMap map[mountDevice]int expectedOutput mountDevice }{ - { - "empty device list", - ExistingDevices{}, - 0, - "bb", - }, { "empty device list with wrap", ExistingDevices{}, - 51, - "ba", // next to 'zz' is the first one, 'ba' - }, - { - "device list", - ExistingDevices{"ba": "used", "bb": "used", "bc": "used"}, - 0, - "bd", - }, - { - "device list with wrap", - ExistingDevices{"cy": "used", "cz": "used", "ba": "used"}, - 49, - "bb", // "cy", "cz" and "ba" are used + generateUnsortedDeviceList(), + "bd", // next to 'zz' is the first one, 'ba' }, } for _, test := range tests { - allocator := NewDeviceAllocator(test.lastIndex).(*deviceAllocator) + allocator := NewDeviceAllocator().(*deviceAllocator) + for k, v := range test.deviceMap { + allocator.possibleDevices[k] = v + } got, err := allocator.GetNext(test.existingDevices) if err != nil { @@ -64,8 +49,20 @@ func TestDeviceAllocator(t *testing.T) { } } +func generateUnsortedDeviceList() map[mountDevice]int { + possibleDevices := make(map[mountDevice]int) + for _, firstChar := range []rune{'b', 'c'} { + for i := 'a'; i <= 'z'; i++ { + dev := mountDevice([]rune{firstChar, i}) + possibleDevices[dev] = 3 + } + } + possibleDevices["bd"] = 0 + return possibleDevices +} + func TestDeviceAllocatorError(t *testing.T) { - allocator := NewDeviceAllocator(0).(*deviceAllocator) + allocator := NewDeviceAllocator().(*deviceAllocator) existingDevices := ExistingDevices{} // make all devices used