mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 11:38:15 +00:00
GCE PD attach/detach operations should poll to verify successful completion
This commit is contained in:
parent
c317020922
commit
7771151767
@ -1079,7 +1079,8 @@ func (gce *GCECloud) AttachDisk(diskName string, readOnly bool) error {
|
|||||||
readWrite = "READ_ONLY"
|
readWrite = "READ_ONLY"
|
||||||
}
|
}
|
||||||
attachedDisk := gce.convertDiskToAttachedDisk(disk, readWrite)
|
attachedDisk := gce.convertDiskToAttachedDisk(disk, readWrite)
|
||||||
_, err = gce.service.Instances.AttachDisk(gce.projectID, gce.zone, gce.instanceID, attachedDisk).Do()
|
|
||||||
|
attachOp, err := gce.service.Instances.AttachDisk(gce.projectID, gce.zone, gce.instanceID, attachedDisk).Do()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Check if the disk is already attached to this instance. We do this only
|
// Check if the disk is already attached to this instance. We do this only
|
||||||
// in the error case, since it is expected to be exceptional.
|
// in the error case, since it is expected to be exceptional.
|
||||||
@ -1093,14 +1094,18 @@ func (gce *GCECloud) AttachDisk(diskName string, readOnly bool) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return err
|
|
||||||
|
return gce.waitForZoneOp(attachOp)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gce *GCECloud) DetachDisk(devicePath string) error {
|
func (gce *GCECloud) DetachDisk(devicePath string) error {
|
||||||
_, err := gce.service.Instances.DetachDisk(gce.projectID, gce.zone, gce.instanceID, devicePath).Do()
|
detachOp, err := gce.service.Instances.DetachDisk(gce.projectID, gce.zone, gce.instanceID, devicePath).Do()
|
||||||
return err
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return gce.waitForZoneOp(detachOp)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gce *GCECloud) getDisk(diskName string) (*compute.Disk, error) {
|
func (gce *GCECloud) getDisk(diskName string) (*compute.Disk, error) {
|
||||||
|
@ -40,7 +40,7 @@ const (
|
|||||||
diskPartitionSuffix = "-part"
|
diskPartitionSuffix = "-part"
|
||||||
diskSDPath = "/dev/sd"
|
diskSDPath = "/dev/sd"
|
||||||
diskSDPattern = "/dev/sd*"
|
diskSDPattern = "/dev/sd*"
|
||||||
maxChecks = 10
|
maxChecks = 60
|
||||||
maxRetries = 10
|
maxRetries = 10
|
||||||
checkSleepDuration = time.Second
|
checkSleepDuration = time.Second
|
||||||
errorSleepDuration = 5 * time.Second
|
errorSleepDuration = 5 * time.Second
|
||||||
@ -123,15 +123,14 @@ func (util *GCEDiskUtil) DetachDisk(c *gcePersistentDiskCleaner) error {
|
|||||||
// Attaches the specified persistent disk device to node, verifies that it is attached, and retries if it fails.
|
// Attaches the specified persistent disk device to node, verifies that it is attached, and retries if it fails.
|
||||||
func attachDiskAndVerify(b *gcePersistentDiskBuilder, sdBeforeSet sets.String) (string, error) {
|
func attachDiskAndVerify(b *gcePersistentDiskBuilder, sdBeforeSet sets.String) (string, error) {
|
||||||
devicePaths := getDiskByIdPaths(b.gcePersistentDisk)
|
devicePaths := getDiskByIdPaths(b.gcePersistentDisk)
|
||||||
var gce cloudprovider.Interface
|
var gceCloud *gce_cloud.GCECloud
|
||||||
for numRetries := 0; numRetries < maxRetries; numRetries++ {
|
for numRetries := 0; numRetries < maxRetries; numRetries++ {
|
||||||
if gce == nil {
|
var err error
|
||||||
var err error
|
if gceCloud == nil {
|
||||||
gce, err = cloudprovider.GetCloudProvider("gce", nil)
|
gceCloud, err = getCloudProvider()
|
||||||
if err != nil || gce == nil {
|
if err != nil || gceCloud == nil {
|
||||||
// Retry on error. See issue #11321
|
// Retry on error. See issue #11321
|
||||||
glog.Errorf("Error getting GCECloudProvider while attaching PD %q: %v", b.pdName, err)
|
glog.Errorf("Error getting GCECloudProvider while detaching PD %q: %v", b.pdName, err)
|
||||||
gce = nil
|
|
||||||
time.Sleep(errorSleepDuration)
|
time.Sleep(errorSleepDuration)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -141,27 +140,21 @@ func attachDiskAndVerify(b *gcePersistentDiskBuilder, sdBeforeSet sets.String) (
|
|||||||
glog.Warningf("Timed out waiting for GCE PD %q to attach. Retrying attach.", b.pdName)
|
glog.Warningf("Timed out waiting for GCE PD %q to attach. Retrying attach.", b.pdName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := gce.(*gce_cloud.GCECloud).AttachDisk(b.pdName, b.readOnly); err != nil {
|
if err := gceCloud.AttachDisk(b.pdName, b.readOnly); err != nil {
|
||||||
// Retry on error. See issue #11321. Continue and verify if disk is attached, because a
|
// Retry on error. See issue #11321. Continue and verify if disk is attached, because a
|
||||||
// previous attach operation may still succeed.
|
// previous attach operation may still succeed.
|
||||||
glog.Errorf("Error attaching PD %q: %v", b.pdName, err)
|
glog.Errorf("Error attaching PD %q: %v", b.pdName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for numChecks := 0; numChecks < maxChecks; numChecks++ {
|
for numChecks := 0; numChecks < maxChecks; numChecks++ {
|
||||||
if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil {
|
path, err := verifyDevicePath(devicePaths, sdBeforeSet)
|
||||||
// udevadm errors should not block disk attachment, log and continue
|
if err != nil {
|
||||||
glog.Errorf("%v", err)
|
// Log error, if any, and continue checking periodically. See issue #11321
|
||||||
}
|
glog.Errorf("Error verifying GCE PD (%q) is attached: %v", b.pdName, err)
|
||||||
|
} else if path != "" {
|
||||||
for _, path := range devicePaths {
|
// A device path has successfully been created for the PD
|
||||||
if pathExists, err := pathExists(path); err != nil {
|
glog.Infof("Successfully attached GCE PD %q.", b.pdName)
|
||||||
// Retry on error. See issue #11321
|
return path, nil
|
||||||
glog.Errorf("Error checking if path exists: %v", err)
|
|
||||||
} else if pathExists {
|
|
||||||
// A device path has successfully been created for the PD
|
|
||||||
glog.Infof("Successfully attached GCE PD %q.", b.pdName)
|
|
||||||
return path, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sleep then check again
|
// Sleep then check again
|
||||||
@ -173,6 +166,24 @@ func attachDiskAndVerify(b *gcePersistentDiskBuilder, sdBeforeSet sets.String) (
|
|||||||
return "", fmt.Errorf("Could not attach GCE PD %q. Timeout waiting for mount paths to be created.", b.pdName)
|
return "", fmt.Errorf("Could not attach GCE PD %q. Timeout waiting for mount paths to be created.", b.pdName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the first path that exists, or empty string if none exist.
|
||||||
|
func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, error) {
|
||||||
|
if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil {
|
||||||
|
// udevadm errors should not block disk detachment, log and continue
|
||||||
|
glog.Errorf("udevadmChangeToNewDrives failed with: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, path := range devicePaths {
|
||||||
|
if pathExists, err := pathExists(path); err != nil {
|
||||||
|
return "", fmt.Errorf("Error checking if path exists: %v", err)
|
||||||
|
} else if pathExists {
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
// Detaches the specified persistent disk device from node, verifies that it is detached, and retries if it fails.
|
// Detaches the specified persistent disk device from node, verifies that it is detached, and retries if it fails.
|
||||||
// This function is intended to be called asynchronously as a go routine.
|
// This function is intended to be called asynchronously as a go routine.
|
||||||
// It starts the detachCleanupManager with the specified pdName so that callers can wait for completion.
|
// It starts the detachCleanupManager with the specified pdName so that callers can wait for completion.
|
||||||
@ -204,15 +215,14 @@ func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
devicePaths := getDiskByIdPaths(c.gcePersistentDisk)
|
devicePaths := getDiskByIdPaths(c.gcePersistentDisk)
|
||||||
var gce cloudprovider.Interface
|
var gceCloud *gce_cloud.GCECloud
|
||||||
for numRetries := 0; numRetries < maxRetries; numRetries++ {
|
for numRetries := 0; numRetries < maxRetries; numRetries++ {
|
||||||
if gce == nil {
|
var err error
|
||||||
var err error
|
if gceCloud == nil {
|
||||||
gce, err = cloudprovider.GetCloudProvider("gce", nil)
|
gceCloud, err = getCloudProvider()
|
||||||
if err != nil || gce == nil {
|
if err != nil || gceCloud == nil {
|
||||||
// Retry on error. See issue #11321
|
// Retry on error. See issue #11321
|
||||||
glog.Errorf("Error getting GCECloudProvider while detaching PD %q: %v", c.pdName, err)
|
glog.Errorf("Error getting GCECloudProvider while detaching PD %q: %v", c.pdName, err)
|
||||||
gce = nil
|
|
||||||
time.Sleep(errorSleepDuration)
|
time.Sleep(errorSleepDuration)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -222,27 +232,18 @@ func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
|
|||||||
glog.Warningf("Timed out waiting for GCE PD %q to detach. Retrying detach.", c.pdName)
|
glog.Warningf("Timed out waiting for GCE PD %q to detach. Retrying detach.", c.pdName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := gce.(*gce_cloud.GCECloud).DetachDisk(c.pdName); err != nil {
|
if err := gceCloud.DetachDisk(c.pdName); err != nil {
|
||||||
// Retry on error. See issue #11321. Continue and verify if disk is detached, because a
|
// Retry on error. See issue #11321. Continue and verify if disk is detached, because a
|
||||||
// previous detach operation may still succeed.
|
// previous detach operation may still succeed.
|
||||||
glog.Errorf("Error detaching PD %q: %v", c.pdName, err)
|
glog.Errorf("Error detaching PD %q: %v", c.pdName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for numChecks := 0; numChecks < maxChecks; numChecks++ {
|
for numChecks := 0; numChecks < maxChecks; numChecks++ {
|
||||||
allPathsRemoved := true
|
allPathsRemoved, err := verifyAllPathsRemoved(devicePaths)
|
||||||
for _, path := range devicePaths {
|
if err != nil {
|
||||||
if err := udevadmChangeToDrive(path); err != nil {
|
// Log error, if any, and continue checking periodically.
|
||||||
// udevadm errors should not block disk detachment, log and continue
|
glog.Errorf("Error verifying GCE PD (%q) is detached: %v", c.pdName, err)
|
||||||
glog.Errorf("%v", err)
|
} else if allPathsRemoved {
|
||||||
}
|
|
||||||
if exists, err := pathExists(path); err != nil {
|
|
||||||
// Retry on error. See issue #11321
|
|
||||||
glog.Errorf("Error checking if path exists: %v", err)
|
|
||||||
} else {
|
|
||||||
allPathsRemoved = allPathsRemoved && !exists
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if allPathsRemoved {
|
|
||||||
// All paths to the PD have been succefully removed
|
// All paths to the PD have been succefully removed
|
||||||
glog.Infof("Successfully detached GCE PD %q.", c.pdName)
|
glog.Infof("Successfully detached GCE PD %q.", c.pdName)
|
||||||
return
|
return
|
||||||
@ -258,6 +259,24 @@ func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
|
|||||||
glog.Errorf("Failed to detach GCE PD %q. One or more mount paths was not removed.", c.pdName)
|
glog.Errorf("Failed to detach GCE PD %q. One or more mount paths was not removed.", c.pdName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the first path that exists, or empty string if none exist.
|
||||||
|
func verifyAllPathsRemoved(devicePaths []string) (bool, error) {
|
||||||
|
allPathsRemoved := true
|
||||||
|
for _, path := range devicePaths {
|
||||||
|
if err := udevadmChangeToDrive(path); err != nil {
|
||||||
|
// udevadm errors should not block disk detachment, log and continue
|
||||||
|
glog.Errorf("%v", err)
|
||||||
|
}
|
||||||
|
if exists, err := pathExists(path); err != nil {
|
||||||
|
return false, fmt.Errorf("Error checking if path exists: %v", err)
|
||||||
|
} else {
|
||||||
|
allPathsRemoved = allPathsRemoved && !exists
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return allPathsRemoved, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Returns list of all /dev/disk/by-id/* paths for given PD.
|
// Returns list of all /dev/disk/by-id/* paths for given PD.
|
||||||
func getDiskByIdPaths(pd *gcePersistentDisk) []string {
|
func getDiskByIdPaths(pd *gcePersistentDisk) []string {
|
||||||
devicePaths := []string{
|
devicePaths := []string{
|
||||||
@ -286,6 +305,17 @@ func pathExists(path string) (bool, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return cloud provider
|
||||||
|
func getCloudProvider() (*gce_cloud.GCECloud, error) {
|
||||||
|
gceCloudProvider, err := cloudprovider.GetCloudProvider("gce", nil)
|
||||||
|
if err != nil || gceCloudProvider == nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// The conversion must be safe otherwise bug in GetCloudProvider()
|
||||||
|
return gceCloudProvider.(*gce_cloud.GCECloud), nil
|
||||||
|
}
|
||||||
|
|
||||||
// Calls "udevadm trigger --action=change" for newly created "/dev/sd*" drives (exist only in after set).
|
// Calls "udevadm trigger --action=change" for newly created "/dev/sd*" drives (exist only in after set).
|
||||||
// This is workaround for Issue #7972. Once the underlying issue has been resolved, this may be removed.
|
// This is workaround for Issue #7972. Once the underlying issue has been resolved, this may be removed.
|
||||||
func udevadmChangeToNewDrives(sdBeforeSet sets.String) error {
|
func udevadmChangeToNewDrives(sdBeforeSet sets.String) error {
|
||||||
|
Loading…
Reference in New Issue
Block a user