Merge pull request #40417 from jsravn/fix-reconciler-external-updates-race

Automatic merge from submit-queue (batch tested with PRs 41531, 40417, 41434)

Always detach volumes in operator executor

**What this PR does / why we need it**:

Instead of marking a volume as detached immediately in Kubelet's
reconciler, delegate the marking asynchronously to the operator
executor. This is necessary to prevent race conditions with other
operations mutating the same volume state.

An example of one such problem:

1. pod is created, volume is added to desired state of the world
2. reconciler process starts
3. reconciler starts MountVolume, which is kicked off asynchronously via
   operation_executor.go
4. MountVolume mounts the volume, but hasn't yet marked it as mounted
5. pod is deleted, volume is removed from desired state of the world
6. reconciler reaches detach volume section, detects volume is no longer in desired state of world,
   removes it from volumes in use
7. MountVolume tries to mark mount, throws an error because
   volume is no longer in actual state of world list. After this, kubelet isn't aware of the mount
   so doesn't try to unmount again.
8. controller-manager tries to detach the volume, this fails because it
   is still mounted to the OS.
9. EBS gets stuck indefinitely in busy state trying to detach.



**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #32881, fixes ##37854 (maybe)

**Special notes for your reviewer**:

**Release note**:

```release-note
```
This commit is contained in:
Kubernetes Submit Queue 2017-02-15 23:01:07 -08:00 committed by GitHub
commit ddf4a0cad5
3 changed files with 18 additions and 14 deletions

View File

@ -314,7 +314,9 @@ func (rc *reconciler) reconcile() {
// Ensure devices that should be detached/unmounted are detached/unmounted.
for _, attachedVolume := range rc.actualStateOfWorld.GetUnmountedVolumes() {
if !rc.desiredStateOfWorld.VolumeExists(attachedVolume.VolumeName) {
// Check IsOperationPending to avoid marking a volume as detached if it's in the process of mounting.
if !rc.desiredStateOfWorld.VolumeExists(attachedVolume.VolumeName) &&
!rc.operationExecutor.IsOperationPending(attachedVolume.VolumeName, nestedpendingoperations.EmptyUniquePodName) {
if attachedVolume.GloballyMounted {
// Volume is globally mounted to device, unmount it
glog.V(12).Infof("Attempting to start UnmountDevice for volume %q (spec.Name: %q)",
@ -341,11 +343,13 @@ func (rc *reconciler) reconcile() {
}
} else {
// Volume is attached to node, detach it
// Kubelet not responsible for detaching or this volume has a non-attachable volume plugin.
if rc.controllerAttachDetachEnabled || !attachedVolume.PluginIsAttachable {
// Kubelet not responsible for detaching or this volume has a non-attachable volume plugin,
// so just remove it to actualStateOfWorld without attach.
rc.actualStateOfWorld.MarkVolumeAsDetached(
attachedVolume.VolumeName, rc.nodeName)
rc.actualStateOfWorld.MarkVolumeAsDetached(attachedVolume.VolumeName, attachedVolume.NodeName)
glog.Infof("Detached volume %q (spec.Name: %q) devicePath: %q",
attachedVolume.VolumeName,
attachedVolume.VolumeSpec.Name(),
attachedVolume.DevicePath)
} else {
// Only detach if kubelet detach is enabled
glog.V(12).Infof("Attempting to start DetachVolume for volume %q (spec.Name: %q)",

View File

@ -36,11 +36,11 @@ import (
)
const (
// emptyUniquePodName is a UniquePodName for empty string.
emptyUniquePodName types.UniquePodName = types.UniquePodName("")
// EmptyUniquePodName is a UniquePodName for empty string.
EmptyUniquePodName types.UniquePodName = types.UniquePodName("")
// emptyUniqueVolumeName is a UniqueVolumeName for empty string
emptyUniqueVolumeName v1.UniqueVolumeName = v1.UniqueVolumeName("")
// EmptyUniqueVolumeName is a UniqueVolumeName for empty string
EmptyUniqueVolumeName v1.UniqueVolumeName = v1.UniqueVolumeName("")
)
// NestedPendingOperations defines the supported set of operations.
@ -160,7 +160,7 @@ func (grm *nestedPendingOperations) isOperationExists(
podName types.UniquePodName) (bool, int) {
// If volumeName is empty, operation can be executed concurrently
if volumeName == emptyUniqueVolumeName {
if volumeName == EmptyUniqueVolumeName {
return false, -1
}
@ -170,8 +170,8 @@ func (grm *nestedPendingOperations) isOperationExists(
continue
}
if previousOp.podName != emptyUniquePodName &&
podName != emptyUniquePodName &&
if previousOp.podName != EmptyUniquePodName &&
podName != EmptyUniquePodName &&
previousOp.podName != podName {
// No match, keep searching
continue
@ -274,7 +274,7 @@ func (grm *nestedPendingOperations) Wait() {
func getOperationName(
volumeName v1.UniqueVolumeName, podName types.UniquePodName) string {
podNameStr := ""
if podName != emptyUniquePodName {
if podName != EmptyUniquePodName {
podNameStr = fmt.Sprintf(" (%q)", podName)
}

View File

@ -411,7 +411,7 @@ func (oe *operationExecutor) MountVolume(
return err
}
podName := volumetypes.UniquePodName("")
podName := nestedpendingoperations.EmptyUniquePodName
// TODO: remove this -- not necessary
if !volumeToMount.PluginIsAttachable {
// Non-attachable volume plugins can execute mount for multiple pods