Merge pull request #16053 from saad-ali/attachDetachMutextFix

Fix GCE Cloud/Attach/Detach stability issues
This commit is contained in:
Daniel Smith 2015-10-26 13:05:27 -07:00
commit b07dd73f26
6 changed files with 267 additions and 355 deletions

View File

@ -55,6 +55,9 @@ const (
gceAffinityTypeClientIP = "CLIENT_IP"
// AffinityTypeClientIPProto - affinity based on Client IP and port.
gceAffinityTypeClientIPProto = "CLIENT_IP_PROTO"
operationPollInterval = 3 * time.Second
operationPollTimeoutDuration = 30 * time.Minute
)
// GCECloud is an implementation of Interface, TCPLoadBalancer and Instances for Google Compute Engine.
@ -259,48 +262,57 @@ func (gce *GCECloud) targetPoolURL(name, region string) string {
return fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/%s/regions/%s/targetPools/%s", gce.projectID, region, name)
}
func waitForOp(op *compute.Operation, getOperation func() (*compute.Operation, error)) error {
pollOp := op
consecPollFails := 0
for pollOp.Status != "DONE" {
var err error
time.Sleep(3 * time.Second)
pollOp, err = getOperation()
func waitForOp(op *compute.Operation, getOperation func(operationName string) (*compute.Operation, error)) error {
if op == nil {
return fmt.Errorf("operation must not be nil")
}
if opIsDone(op) {
return getErrorFromOp(op)
}
opName := op.Name
return wait.Poll(operationPollInterval, operationPollTimeoutDuration, func() (bool, error) {
pollOp, err := getOperation(opName)
if err != nil {
if consecPollFails == 2 {
// Only bail if we've seen 3 consecutive polling errors.
glog.Warningf("GCE poll operation failed: %v", err)
}
return opIsDone(pollOp), getErrorFromOp(pollOp)
})
}
func opIsDone(op *compute.Operation) bool {
return op != nil && op.Status == "DONE"
}
func getErrorFromOp(op *compute.Operation) error {
if op != nil && op.Error != nil && len(op.Error.Errors) > 0 {
err := &googleapi.Error{
Code: int(op.HttpErrorStatusCode),
Message: op.Error.Errors[0].Message,
}
glog.Errorf("GCE operation failed: %v", err)
return err
}
consecPollFails++
} else {
consecPollFails = 0
}
}
if pollOp.Error != nil && len(pollOp.Error.Errors) > 0 {
return &googleapi.Error{
Code: int(pollOp.HttpErrorStatusCode),
Message: pollOp.Error.Errors[0].Message,
}
}
return nil
return nil
}
func (gce *GCECloud) waitForGlobalOp(op *compute.Operation) error {
return waitForOp(op, func() (*compute.Operation, error) {
return gce.service.GlobalOperations.Get(gce.projectID, op.Name).Do()
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
return gce.service.GlobalOperations.Get(gce.projectID, operationName).Do()
})
}
func (gce *GCECloud) waitForRegionOp(op *compute.Operation, region string) error {
return waitForOp(op, func() (*compute.Operation, error) {
return gce.service.RegionOperations.Get(gce.projectID, region, op.Name).Do()
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
return gce.service.RegionOperations.Get(gce.projectID, region, operationName).Do()
})
}
func (gce *GCECloud) waitForZoneOp(op *compute.Operation) error {
return waitForOp(op, func() (*compute.Operation, error) {
return gce.service.ZoneOperations.Get(gce.projectID, gce.zone, op.Name).Do()
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
return gce.service.ZoneOperations.Get(gce.projectID, gce.zone, operationName).Do()
})
}
@ -1456,20 +1468,9 @@ func (gce *GCECloud) AttachDisk(diskName string, readOnly bool) error {
attachedDisk := gce.convertDiskToAttachedDisk(disk, readWrite)
attachOp, err := gce.service.Instances.AttachDisk(gce.projectID, gce.zone, gce.instanceID, attachedDisk).Do()
if err != nil {
// Check if the disk is already attached to this instance. We do this only
// in the error case, since it is expected to be exceptional.
instance, err := gce.service.Instances.Get(gce.projectID, gce.zone, gce.instanceID).Do()
if err != nil {
return err
}
for _, disk := range instance.Disks {
if disk.Source == attachedDisk.Source {
// Disk is already attached, we're good to go.
return nil
}
}
}
return gce.waitForZoneOp(attachOp)
}

View File

@ -0,0 +1,82 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package keymutex
import (
"fmt"
"github.com/golang/glog"
"sync"
)
// KeyMutex is a thread-safe interface for aquiring locks on arbitrary strings.
type KeyMutex interface {
// Aquires a lock associated with the specified ID, creates the lock if one doesn't already exist.
LockKey(id string)
// Releases the lock associated with the specified ID.
// Returns an error if the specified ID doesn't exist.
UnlockKey(id string) error
}
// Returns a new instance of a key mutex.
func NewKeyMutex() KeyMutex {
return &keyMutex{
mutexMap: make(map[string]*sync.Mutex),
}
}
type keyMutex struct {
sync.RWMutex
mutexMap map[string]*sync.Mutex
}
// Aquires a lock associated with the specified ID (creates the lock if one doesn't already exist).
func (km *keyMutex) LockKey(id string) {
glog.V(5).Infof("LockKey(...) called for id %q\r\n", id)
mutex := km.getOrCreateLock(id)
mutex.Lock()
glog.V(5).Infof("LockKey(...) for id %q completed.\r\n", id)
}
// Releases the lock associated with the specified ID.
// Returns an error if the specified ID doesn't exist.
func (km *keyMutex) UnlockKey(id string) error {
glog.V(5).Infof("UnlockKey(...) called for id %q\r\n", id)
km.RLock()
defer km.RUnlock()
mutex, exists := km.mutexMap[id]
if !exists {
return fmt.Errorf("id %q not found", id)
}
glog.V(5).Infof("UnlockKey(...) for id. Mutex found, trying to unlock it. %q\r\n", id)
mutex.Unlock()
glog.V(5).Infof("UnlockKey(...) for id %q completed.\r\n", id)
return nil
}
// Returns lock associated with the specified ID, or creates the lock if one doesn't already exist.
func (km *keyMutex) getOrCreateLock(id string) *sync.Mutex {
km.Lock()
defer km.Unlock()
if _, exists := km.mutexMap[id]; !exists {
km.mutexMap[id] = &sync.Mutex{}
}
return km.mutexMap[id]
}

View File

@ -0,0 +1,111 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package keymutex
import (
"testing"
"time"
)
const (
callbackTimeout = 1 * time.Second
)
func Test_SingleLock_NoUnlock(t *testing.T) {
// Arrange
km := NewKeyMutex()
key := "fakeid"
callbackCh := make(chan interface{})
// Act
go lockAndCallback(km, key, callbackCh)
// Assert
verifyCallbackHappens(t, callbackCh)
}
func Test_SingleLock_SingleUnlock(t *testing.T) {
// Arrange
km := NewKeyMutex()
key := "fakeid"
callbackCh := make(chan interface{})
// Act & Assert
go lockAndCallback(km, key, callbackCh)
verifyCallbackHappens(t, callbackCh)
km.UnlockKey(key)
}
func Test_DoubleLock_DoubleUnlock(t *testing.T) {
// Arrange
km := NewKeyMutex()
key := "fakeid"
callbackCh1stLock := make(chan interface{})
callbackCh2ndLock := make(chan interface{})
// Act & Assert
go lockAndCallback(km, key, callbackCh1stLock)
verifyCallbackHappens(t, callbackCh1stLock)
go lockAndCallback(km, key, callbackCh2ndLock)
verifyCallbackDoesntHappens(t, callbackCh2ndLock)
km.UnlockKey(key)
verifyCallbackHappens(t, callbackCh2ndLock)
km.UnlockKey(key)
}
func lockAndCallback(km KeyMutex, id string, callbackCh chan<- interface{}) {
km.LockKey(id)
callbackCh <- true
}
func verifyCallbackHappens(t *testing.T, callbackCh <-chan interface{}) bool {
select {
case <-callbackCh:
return true
case <-time.After(callbackTimeout):
t.Fatalf("Timed out waiting for callback.")
return false
}
}
func verifyCallbackDoesntHappens(t *testing.T, callbackCh <-chan interface{}) bool {
select {
case <-callbackCh:
t.Fatalf("Unexpected callback.")
return false
case <-time.After(callbackTimeout):
return true
}
}
func verifyNoError(t *testing.T, err error, name string) {
if err != nil {
t.Fatalf("Unexpected response on %q. Expected: <no error> Actual: <%v>", name, err)
}
}
func verifyError(t *testing.T, err error, name string) {
if err == nil {
t.Fatalf("Unexpected response on %q. Expected: <error> Actual: <no error>", name)
}
}
func verifyMsg(t *testing.T, expected, actual string) {
if actual != expected {
t.Fatalf("Unexpected testMsg value. Expected: <%v> Actual: <%v>", expected, actual)
}
}

View File

@ -1,103 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package operationmanager
import (
"fmt"
"sync"
)
// Operation Manager is a thread-safe interface for keeping track of multiple pending async operations.
type OperationManager interface {
// Called when the operation with the given ID has started.
// Creates a new channel with specified buffer size tracked with the specified ID.
// Returns a read-only version of the newly created channel.
// Returns an error if an entry with the specified ID already exists (previous entry must be removed by calling Close).
Start(id string, bufferSize uint) (<-chan interface{}, error)
// Called when the operation with the given ID has terminated.
// Closes and removes the channel associated with ID.
// Returns an error if no associated channel exists.
Close(id string) error
// Attempts to send msg to the channel associated with ID.
// Returns an error if no associated channel exists.
Send(id string, msg interface{}) error
// Returns true if an entry with the specified ID already exists.
Exists(id string) bool
}
// Returns a new instance of a channel manager.
func NewOperationManager() OperationManager {
return &operationManager{
chanMap: make(map[string]chan interface{}),
}
}
type operationManager struct {
sync.RWMutex
chanMap map[string]chan interface{}
}
// Called when the operation with the given ID has started.
// Creates a new channel with specified buffer size tracked with the specified ID.
// Returns a read-only version of the newly created channel.
// Returns an error if an entry with the specified ID already exists (previous entry must be removed by calling Close).
func (cm *operationManager) Start(id string, bufferSize uint) (<-chan interface{}, error) {
cm.Lock()
defer cm.Unlock()
if _, exists := cm.chanMap[id]; exists {
return nil, fmt.Errorf("id %q already exists", id)
}
cm.chanMap[id] = make(chan interface{}, bufferSize)
return cm.chanMap[id], nil
}
// Called when the operation with the given ID has terminated.
// Closes and removes the channel associated with ID.
// Returns an error if no associated channel exists.
func (cm *operationManager) Close(id string) error {
cm.Lock()
defer cm.Unlock()
if _, exists := cm.chanMap[id]; !exists {
return fmt.Errorf("id %q not found", id)
}
close(cm.chanMap[id])
delete(cm.chanMap, id)
return nil
}
// Attempts to send msg to the channel associated with ID.
// Returns an error if no associated channel exists.
func (cm *operationManager) Send(id string, msg interface{}) error {
cm.RLock()
defer cm.RUnlock()
if _, exists := cm.chanMap[id]; !exists {
return fmt.Errorf("id %q not found", id)
}
cm.chanMap[id] <- msg
return nil
}
// Returns true if an entry with the specified ID already exists.
func (cm *operationManager) Exists(id string) (exists bool) {
cm.RLock()
defer cm.RUnlock()
_, exists = cm.chanMap[id]
return
}

View File

@ -1,159 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Channel Manager keeps track of multiple channels
package operationmanager
import (
"testing"
)
func TestStart(t *testing.T) {
// Arrange
cm := NewOperationManager()
chanId := "testChanId"
testMsg := "test message"
// Act
ch, startErr := cm.Start(chanId, 1 /* bufferSize */)
sigErr := cm.Send(chanId, testMsg)
// Assert
verifyNoError(t, startErr, "Start")
verifyNoError(t, sigErr, "Send")
actualMsg := <-ch
verifyMsg(t, testMsg /* expected */, actualMsg.(string) /* actual */)
}
func TestStartIdExists(t *testing.T) {
// Arrange
cm := NewOperationManager()
chanId := "testChanId"
// Act
_, startErr1 := cm.Start(chanId, 1 /* bufferSize */)
_, startErr2 := cm.Start(chanId, 1 /* bufferSize */)
// Assert
verifyNoError(t, startErr1, "Start1")
verifyError(t, startErr2, "Start2")
}
func TestStartAndAdd2Chans(t *testing.T) {
// Arrange
cm := NewOperationManager()
chanId1 := "testChanId1"
chanId2 := "testChanId2"
testMsg1 := "test message 1"
testMsg2 := "test message 2"
// Act
ch1, startErr1 := cm.Start(chanId1, 1 /* bufferSize */)
ch2, startErr2 := cm.Start(chanId2, 1 /* bufferSize */)
sigErr1 := cm.Send(chanId1, testMsg1)
sigErr2 := cm.Send(chanId2, testMsg2)
// Assert
verifyNoError(t, startErr1, "Start1")
verifyNoError(t, startErr2, "Start2")
verifyNoError(t, sigErr1, "Send1")
verifyNoError(t, sigErr2, "Send2")
actualMsg1 := <-ch1
actualMsg2 := <-ch2
verifyMsg(t, testMsg1 /* expected */, actualMsg1.(string) /* actual */)
verifyMsg(t, testMsg2 /* expected */, actualMsg2.(string) /* actual */)
}
func TestStartAndAdd2ChansAndClose(t *testing.T) {
// Arrange
cm := NewOperationManager()
chanId1 := "testChanId1"
chanId2 := "testChanId2"
testMsg1 := "test message 1"
testMsg2 := "test message 2"
// Act
ch1, startErr1 := cm.Start(chanId1, 1 /* bufferSize */)
ch2, startErr2 := cm.Start(chanId2, 1 /* bufferSize */)
sigErr1 := cm.Send(chanId1, testMsg1)
sigErr2 := cm.Send(chanId2, testMsg2)
cm.Close(chanId1)
sigErr3 := cm.Send(chanId1, testMsg1)
// Assert
verifyNoError(t, startErr1, "Start1")
verifyNoError(t, startErr2, "Start2")
verifyNoError(t, sigErr1, "Send1")
verifyNoError(t, sigErr2, "Send2")
verifyError(t, sigErr3, "Send3")
actualMsg1 := <-ch1
actualMsg2 := <-ch2
verifyMsg(t, testMsg1 /* expected */, actualMsg1.(string) /* actual */)
verifyMsg(t, testMsg2 /* expected */, actualMsg2.(string) /* actual */)
}
func TestExists(t *testing.T) {
// Arrange
cm := NewOperationManager()
chanId1 := "testChanId1"
chanId2 := "testChanId2"
// Act & Assert
verifyExists(t, cm, chanId1, false /* expected */)
verifyExists(t, cm, chanId2, false /* expected */)
_, startErr1 := cm.Start(chanId1, 1 /* bufferSize */)
verifyNoError(t, startErr1, "Start1")
verifyExists(t, cm, chanId1, true /* expected */)
verifyExists(t, cm, chanId2, false /* expected */)
_, startErr2 := cm.Start(chanId2, 1 /* bufferSize */)
verifyNoError(t, startErr2, "Start2")
verifyExists(t, cm, chanId1, true /* expected */)
verifyExists(t, cm, chanId2, true /* expected */)
cm.Close(chanId1)
verifyExists(t, cm, chanId1, false /* expected */)
verifyExists(t, cm, chanId2, true /* expected */)
cm.Close(chanId2)
verifyExists(t, cm, chanId1, false /* expected */)
verifyExists(t, cm, chanId2, false /* expected */)
}
func verifyExists(t *testing.T, cm OperationManager, id string, expected bool) {
if actual := cm.Exists(id); expected != actual {
t.Fatalf("Unexpected Exists(%q) response. Expected: <%v> Actual: <%v>", id, expected, actual)
}
}
func verifyNoError(t *testing.T, err error, name string) {
if err != nil {
t.Fatalf("Unexpected response on %q. Expected: <no error> Actual: <%v>", name, err)
}
}
func verifyError(t *testing.T, err error, name string) {
if err == nil {
t.Fatalf("Unexpected response on %q. Expected: <error> Actual: <no error>", name)
}
}
func verifyMsg(t *testing.T, expected, actual string) {
if actual != expected {
t.Fatalf("Unexpected testMsg value. Expected: <%v> Actual: <%v>", expected, actual)
}
}

View File

@ -29,7 +29,7 @@ import (
gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/exec"
"k8s.io/kubernetes/pkg/util/operationmanager"
"k8s.io/kubernetes/pkg/util/keymutex"
"k8s.io/kubernetes/pkg/util/sets"
)
@ -46,18 +46,21 @@ const (
errorSleepDuration = 5 * time.Second
)
// Singleton operation manager for managing detach clean up go routines
var detachCleanupManager = operationmanager.NewOperationManager()
// Singleton key mutex for keeping attach/detach operations for the same PD atomic
var attachDetachMutex = keymutex.NewKeyMutex()
type GCEDiskUtil struct{}
// Attaches a disk specified by a volume.GCEPersistentDisk to the current kubelet.
// Mounts the disk to it's global path.
func (diskUtil *GCEDiskUtil) AttachAndMountDisk(b *gcePersistentDiskBuilder, globalPDPath string) error {
glog.V(5).Infof("AttachAndMountDisk(b, %q) where b is %#v\r\n", globalPDPath, b)
glog.V(5).Infof("AttachAndMountDisk(...) called for PD %q. Will block for existing operations, if any. (globalPDPath=%q)\r\n", b.pdName, globalPDPath)
// Block execution until any pending detach goroutines for this pd have completed
detachCleanupManager.Send(b.pdName, true)
// Block execution until any pending detach operations for this PD have completed
attachDetachMutex.LockKey(b.pdName)
defer attachDetachMutex.UnlockKey(b.pdName)
glog.V(5).Infof("AttachAndMountDisk(...) called for PD %q. Awake and ready to execute. (globalPDPath=%q)\r\n", b.pdName, globalPDPath)
sdBefore, err := filepath.Glob(diskSDPattern)
if err != nil {
@ -98,24 +101,13 @@ func (diskUtil *GCEDiskUtil) AttachAndMountDisk(b *gcePersistentDiskBuilder, glo
// Unmounts the device and detaches the disk from the kubelet's host machine.
func (util *GCEDiskUtil) DetachDisk(c *gcePersistentDiskCleaner) error {
// Unmount the global PD mount, which should be the only one.
globalPDPath := makeGlobalPDName(c.plugin.host, c.pdName)
glog.V(5).Infof("DetachDisk(c) where c is %#v and the globalPDPath is %q\r\n", c, globalPDPath)
glog.V(5).Infof("DetachDisk(...) for PD %q\r\n", c.pdName)
if err := c.mounter.Unmount(globalPDPath); err != nil {
return err
}
if err := os.Remove(globalPDPath); err != nil {
return err
if err := unmountPDAndRemoveGlobalPath(c); err != nil {
glog.Errorf("Error unmounting PD %q: %v", c.pdName, err)
}
if detachCleanupManager.Exists(c.pdName) {
glog.Warningf("Terminating new DetachDisk call for GCE PD %q. A previous detach call for this PD is still pending.", c.pdName)
return nil
}
// Detach disk, retry if needed.
// Detach disk asynchronously so that the kubelet sync loop is not blocked.
go detachDiskAndVerify(c)
return nil
}
@ -125,9 +117,6 @@ func attachDiskAndVerify(b *gcePersistentDiskBuilder, sdBeforeSet sets.String) (
devicePaths := getDiskByIdPaths(b.gcePersistentDisk)
var gceCloud *gcecloud.GCECloud
for numRetries := 0; numRetries < maxRetries; numRetries++ {
// Block execution until any pending detach goroutines for this pd have completed
detachCleanupManager.Send(b.pdName, true)
var err error
if gceCloud == nil {
gceCloud, err = getCloudProvider()
@ -140,11 +129,10 @@ func attachDiskAndVerify(b *gcePersistentDiskBuilder, sdBeforeSet sets.String) (
}
if numRetries > 0 {
glog.Warningf("Timed out waiting for GCE PD %q to attach. Retrying attach.", b.pdName)
glog.Warningf("Retrying attach for GCE PD %q (retry count=%v).", b.pdName, numRetries)
}
if err := gceCloud.AttachDisk(b.pdName, b.readOnly); err != nil {
// Retry on error. See issue #11321.
glog.Errorf("Error attaching PD %q: %v", b.pdName, err)
time.Sleep(errorSleepDuration)
continue
@ -190,33 +178,15 @@ func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, er
// Detaches the specified persistent disk device from node, verifies that it is detached, and retries if it fails.
// This function is intended to be called asynchronously as a go routine.
// It starts the detachCleanupManager with the specified pdName so that callers can wait for completion.
func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
glog.V(5).Infof("detachDiskAndVerify for pd %q.", c.pdName)
glog.V(5).Infof("detachDiskAndVerify(...) for pd %q. Will block for pending operations", c.pdName)
defer util.HandleCrash()
// Start operation, so that other threads can wait on this detach operation.
// Set bufferSize to 0 so senders are blocked on send until we receive.
ch, err := detachCleanupManager.Start(c.pdName, 0 /* bufferSize */)
if err != nil {
glog.Errorf("Error adding %q to detachCleanupManager: %v", c.pdName, err)
return
}
// Block execution until any pending attach/detach operations for this PD have completed
attachDetachMutex.LockKey(c.pdName)
defer attachDetachMutex.UnlockKey(c.pdName)
defer detachCleanupManager.Close(c.pdName)
defer func() {
// Unblock any callers that have been waiting for this detach routine to complete.
for {
select {
case <-ch:
glog.V(5).Infof("detachDiskAndVerify for pd %q clearing chan.", c.pdName)
default:
glog.V(5).Infof("detachDiskAndVerify for pd %q done clearing chans.", c.pdName)
return
}
}
}()
glog.V(5).Infof("detachDiskAndVerify(...) for pd %q. Awake and ready to execute.", c.pdName)
devicePaths := getDiskByIdPaths(c.gcePersistentDisk)
var gceCloud *gcecloud.GCECloud
@ -233,13 +203,13 @@ func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
}
if numRetries > 0 {
glog.Warningf("Timed out waiting for GCE PD %q to detach. Retrying detach.", c.pdName)
glog.Warningf("Retrying detach for GCE PD %q (retry count=%v).", c.pdName, numRetries)
}
if err := gceCloud.DetachDisk(c.pdName); err != nil {
// Retry on error. See issue #11321. Continue and verify if disk is detached, because a
// previous detach operation may still succeed.
glog.Errorf("Error detaching PD %q: %v", c.pdName, err)
time.Sleep(errorSleepDuration)
continue
}
for numChecks := 0; numChecks < maxChecks; numChecks++ {
@ -249,6 +219,7 @@ func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
glog.Errorf("Error verifying GCE PD (%q) is detached: %v", c.pdName, err)
} else if allPathsRemoved {
// All paths to the PD have been succefully removed
unmountPDAndRemoveGlobalPath(c)
glog.Infof("Successfully detached GCE PD %q.", c.pdName)
return
}
@ -263,6 +234,15 @@ func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
glog.Errorf("Failed to detach GCE PD %q. One or more mount paths was not removed.", c.pdName)
}
// Unmount the global PD mount, which should be the only one, and delete it.
func unmountPDAndRemoveGlobalPath(c *gcePersistentDiskCleaner) error {
globalPDPath := makeGlobalPDName(c.plugin.host, c.pdName)
err := c.mounter.Unmount(globalPDPath)
os.Remove(globalPDPath)
return err
}
// Returns the first path that exists, or empty string if none exist.
func verifyAllPathsRemoved(devicePaths []string) (bool, error) {
allPathsRemoved := true