diff --git a/pkg/cloudprovider/providers/aws/BUILD b/pkg/cloudprovider/providers/aws/BUILD index df3f0f800f6..8c731825177 100644 --- a/pkg/cloudprovider/providers/aws/BUILD +++ b/pkg/cloudprovider/providers/aws/BUILD @@ -16,6 +16,7 @@ go_library( "aws_loadbalancer.go", "aws_routes.go", "aws_utils.go", + "device_allocator.go", "log_handler.go", "retry_handler.go", "sets_ippermissions.go", @@ -50,6 +51,7 @@ go_test( name = "go_default_test", srcs = [ "aws_test.go", + "device_allocator_test.go", "retry_handler_test.go", ], library = "go_default_library", diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go index c5d4ab626d9..aedcc056e6d 100644 --- a/pkg/cloudprovider/providers/aws/aws.go +++ b/pkg/cloudprovider/providers/aws/aws.go @@ -365,6 +365,9 @@ type Cloud struct { // and then get a second request before we attach the volume attachingMutex sync.Mutex attaching map[types.NodeName]map[mountDevice]awsVolumeID + + // state of our device allocator for each node + deviceAllocators map[types.NodeName]DeviceAllocator } var _ Volumes = &Cloud{} @@ -796,7 +799,8 @@ func newAWSCloud(config io.Reader, awsServices Services) (*Cloud, error) { cfg: cfg, region: regionName, - attaching: make(map[types.NodeName]map[mountDevice]awsVolumeID), + attaching: make(map[types.NodeName]map[mountDevice]awsVolumeID), + deviceAllocators: make(map[types.NodeName]DeviceAllocator), } selfAWSInstance, err := awsCloud.buildSelfAWSInstance() @@ -1210,20 +1214,17 @@ func (c *Cloud) getMountDevice(i *awsInstance, volumeID awsVolumeID, assign bool return mountDevice(""), false, nil } - // Find the first unused device in sequence 'ba', 'bb', 'bc', ... 'bz', 'ca', ... 'zz' - var chosen mountDevice - for first := 'b'; first <= 'z' && chosen == ""; first++ { - for second := 'a'; second <= 'z' && chosen == ""; second++ { - candidate := mountDevice(fmt.Sprintf("%c%c", first, second)) - if _, found := deviceMappings[candidate]; !found { - chosen = candidate - break - } - } + // Find the next unused device name + deviceAllocator := c.deviceAllocators[i.nodeName] + if deviceAllocator == nil { + // we want device names with two significant characters, starting with + // /dev/xvdba (leaving xvda - xvdz and xvdaa-xvdaz to the system) + deviceAllocator = NewDeviceAllocator(2, "ba") + c.deviceAllocators[i.nodeName] = deviceAllocator } - - if chosen == "" { - glog.Warningf("Could not assign a mount device (all in use?). mappings=%v", deviceMappings) + chosen, err := deviceAllocator.GetNext(deviceMappings) + if err != nil { + glog.Warningf("Could not assign a mount device. mappings=%v, error: %v", deviceMappings, err) return "", false, fmt.Errorf("Too many EBS volumes attached to node %s.", i.nodeName) } diff --git a/pkg/cloudprovider/providers/aws/device_allocator.go b/pkg/cloudprovider/providers/aws/device_allocator.go new file mode 100644 index 00000000000..ad78f931ad1 --- /dev/null +++ b/pkg/cloudprovider/providers/aws/device_allocator.go @@ -0,0 +1,95 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import "fmt" + +// ExistingDevices is a map of assigned devices. Presence of a key with a device +// name in the map means that the device is allocated. Value is irrelevant and +// can be used for anything that DeviceAllocator user wants. +// Only the relevant part of device name should be in the map, e.g. "ba" for +// "/dev/xvdba". +type ExistingDevices map[mountDevice]awsVolumeID + +// On AWS, we should assign new (not yet used) device names to attached volumes. +// If we reuse a previously used name, we may get the volume "attaching" forever, +// see https://aws.amazon.com/premiumsupport/knowledge-center/ebs-stuck-attaching/. +// DeviceAllocator finds available device name, taking into account already +// assigned device names from ExistingDevices map. It tries to find the next +// device name to the previously assigned one (from previous DeviceAllocator +// call), so all available device names are used eventually and it minimizes +// device name reuse. +// All these allocations are in-memory, nothing is written to / read from +// /dev directory. +type DeviceAllocator interface { + // GetNext returns a free device name or error when there is no free device + // name. Only the device suffix is returned, e.g. "ba" for "/dev/xvdba". + // It's up to the called to add appropriate "/dev/sd" or "/dev/xvd" prefix. + GetNext(existingDevices ExistingDevices) (mountDevice, error) +} + +type deviceAllocator struct { + firstDevice mountDevice + lastAssignedDevice mountDevice + length int +} + +// NewDeviceAllocator creates new DeviceAlllocator that allocates device names +// of given length ("aaa" for length 3) and with given first device, so all +// devices before the first device are left to the operating system. +// With length 2 and firstDevice "ba", it will allocate device names +// ba, bb, ..., bz, ca, ... cz, ..., da, ... zz, so a..z and aa..az can be used +// by the operating system. +func NewDeviceAllocator(length int, firstDevice mountDevice) DeviceAllocator { + lastDevice := make([]byte, length) + for i := 0; i < length; i++ { + lastDevice[i] = 'z' + } + return &deviceAllocator{ + firstDevice: firstDevice, + lastAssignedDevice: mountDevice(lastDevice), + length: length, + } +} + +func (d *deviceAllocator) GetNext(existingDevices ExistingDevices) (mountDevice, error) { + candidate := d.lastAssignedDevice + + for { + candidate = d.nextDevice(candidate) + if _, found := existingDevices[candidate]; !found { + d.lastAssignedDevice = candidate + return candidate, nil + } + if candidate == d.lastAssignedDevice { + return "", fmt.Errorf("no devices are available") + } + } +} + +func (d *deviceAllocator) nextDevice(device mountDevice) mountDevice { + dev := []byte(device) + for i := d.length - 1; i >= 0; i-- { + if dev[i] != 'z' { + dev[i]++ + return mountDevice(dev) + } + dev[i] = 'a' + } + // all parts of device were 'z', jump to the first device + return d.firstDevice +} diff --git a/pkg/cloudprovider/providers/aws/device_allocator_test.go b/pkg/cloudprovider/providers/aws/device_allocator_test.go new file mode 100644 index 00000000000..801e2c4ebed --- /dev/null +++ b/pkg/cloudprovider/providers/aws/device_allocator_test.go @@ -0,0 +1,103 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import "testing" + +func TestDeviceAllocator(t *testing.T) { + tests := []struct { + name string + existingDevices ExistingDevices + length int + firstDevice mountDevice + lastAllocated mountDevice + expectedOutput mountDevice + }{ + { + "empty device list", + ExistingDevices{}, + 2, + "aa", + "aa", + "ab", + }, + { + "empty device list with wrap", + ExistingDevices{}, + 2, + "ba", + "zz", + "ba", // next to 'zz' is the first one, 'ba' + }, + { + "device list", + ExistingDevices{"aa": "used", "ab": "used", "ac": "used"}, + 2, + "aa", + "aa", + "ad", // all up to "ac" are used + }, + { + "device list with wrap", + ExistingDevices{"zy": "used", "zz": "used", "ba": "used"}, + 2, + "ba", + "zx", + "bb", // "zy", "zz" and "ba" are used + }, + { + "three characters with wrap", + ExistingDevices{"zzy": "used", "zzz": "used", "baa": "used"}, + 3, + "baa", + "zzx", + "bab", + }, + } + + for _, test := range tests { + allocator := NewDeviceAllocator(test.length, test.firstDevice).(*deviceAllocator) + allocator.lastAssignedDevice = test.lastAllocated + + got, err := allocator.GetNext(test.existingDevices) + if err != nil { + t.Errorf("text %q: unexpected error: %v", test.name, err) + } + if got != test.expectedOutput { + t.Errorf("text %q: expected %q, got %q", test.name, test.expectedOutput, got) + } + } +} + +func TestDeviceAllocatorError(t *testing.T) { + allocator := NewDeviceAllocator(2, "ba").(*deviceAllocator) + existingDevices := ExistingDevices{} + + // make all devices used + var first, second byte + for first = 'b'; first <= 'z'; first++ { + for second = 'a'; second <= 'z'; second++ { + device := [2]byte{first, second} + existingDevices[mountDevice(device[:])] = "used" + } + } + + device, err := allocator.GetNext(existingDevices) + if err == nil { + t.Errorf("expected error, got device %q", device) + } +}