Merge pull request #31978 from jsafrane/detach-before-delete

Automatic merge from submit-queue

Do not report error when deleting an attached volume

Persistent volume controller should not send warning events to a PV and mark the PV as failed when the volume is still attached.

This happens when a user quickly deletes a pod and associated PVC - PV is slowly detaching, while the PVC is already deleted and the PV enters Failed phase.

`Deleter.Deleter` can now return `tryAgainError`, which is sent as INFO to the PV to let the user know we did not forget to delete the PV, however the PV stays in Released state. The controller tries again in the next sync (15 seconds by default).

Fixes #31511
This commit is contained in:
Kubernetes Submit Queue 2016-09-25 18:55:32 -07:00 committed by GitHub
commit 4785f6f517
9 changed files with 104 additions and 7 deletions

View File

@ -1401,6 +1401,9 @@ func (d *awsDisk) deleteVolume() (bool, error) {
if awsError.Code() == "InvalidVolume.NotFound" {
return false, nil
}
if awsError.Code() == "VolumeInUse" {
return false, volume.NewDeletedVolumeInUseError(err.Error())
}
}
return false, fmt.Errorf("error deleting EBS volumes: %v", err)
}

View File

@ -40,6 +40,7 @@ import (
netsets "k8s.io/kubernetes/pkg/util/net/sets"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/volume"
"cloud.google.com/go/compute/metadata"
"github.com/golang/glog"
@ -2459,7 +2460,7 @@ func (gce *GCECloud) CreateDisk(name string, diskType string, zone string, sizeG
return gce.waitForZoneOp(createOp, zone)
}
func (gce *GCECloud) DeleteDisk(diskToDelete string) error {
func (gce *GCECloud) doDeleteDisk(diskToDelete string) error {
disk, err := gce.getDiskByNameUnknownZone(diskToDelete)
if err != nil {
return err
@ -2473,6 +2474,30 @@ func (gce *GCECloud) DeleteDisk(diskToDelete string) error {
return gce.waitForZoneOp(deleteOp, disk.Zone)
}
func (gce *GCECloud) DeleteDisk(diskToDelete string) error {
err := gce.doDeleteDisk(diskToDelete)
if isGCEError(err, "resourceInUseByAnotherResource") {
return volume.NewDeletedVolumeInUseError(err.Error())
}
return err
}
// isGCEError returns true if given error is a googleapi.Error with given
// reason (e.g. "resourceInUseByAnotherResource")
func isGCEError(err error, reason string) bool {
apiErr, ok := err.(*googleapi.Error)
if !ok {
return false
}
for _, e := range apiErr.Errors {
if e.Reason == reason {
return true
}
}
return false
}
// Builds the labels that should be automatically added to a PersistentVolume backed by a GCE PD
// Specifically, this builds FailureDomain (zone) and Region labels.
// The PersistentVolumeLabel admission controller calls this and adds the labels when a PV is created.

View File

@ -23,6 +23,8 @@ import (
"path"
"strings"
"k8s.io/kubernetes/pkg/volume"
"github.com/rackspace/gophercloud"
"github.com/rackspace/gophercloud/openstack"
"github.com/rackspace/gophercloud/openstack/blockstorage/v1/volumes"
@ -182,6 +184,15 @@ func (os *OpenStack) GetDevicePath(diskId string) string {
}
func (os *OpenStack) DeleteVolume(volumeName string) error {
used, err := os.diskIsUsed(volumeName)
if err != nil {
return err
}
if used {
msg := fmt.Sprintf("Cannot delete the volume %q, it's still attached to a node", volumeName)
return volume.NewDeletedVolumeInUseError(msg)
}
sClient, err := openstack.NewBlockStorageV1(os.provider, gophercloud.EndpointOpts{
Region: os.region,
})
@ -228,3 +239,15 @@ func (os *OpenStack) DiskIsAttached(diskName, instanceID string) (bool, error) {
}
return false, nil
}
// diskIsUsed returns true a disk is attached to any node.
func (os *OpenStack) diskIsUsed(diskName string) (bool, error) {
disk, err := os.getVolume(diskName)
if err != nil {
return false, err
}
if len(disk.Attachments) > 0 {
return true, nil
}
return false, nil
}

View File

@ -1070,11 +1070,20 @@ func (ctrl *PersistentVolumeController) deleteVolumeOperation(arg interface{}) {
if err != nil {
// Delete failed, update the volume and emit an event.
glog.V(3).Infof("deletion of volume %q failed: %v", volume.Name, err)
if _, err = ctrl.updateVolumePhaseWithEvent(volume, api.VolumeFailed, api.EventTypeWarning, "VolumeFailedDelete", err.Error()); err != nil {
glog.V(4).Infof("deleteVolumeOperation [%s]: failed to mark volume as failed: %v", volume.Name, err)
// Save failed, retry on the next deletion attempt
return
if vol.IsDeletedVolumeInUse(err) {
// The plugin needs more time, don't mark the volume as Failed
// and send Normal event only
ctrl.eventRecorder.Event(volume, api.EventTypeNormal, "VolumeDelete", err.Error())
} else {
// The plugin failed, mark the volume as Failed and send Warning
// event
if _, err = ctrl.updateVolumePhaseWithEvent(volume, api.VolumeFailed, api.EventTypeWarning, "VolumeFailedDelete", err.Error()); err != nil {
glog.V(4).Infof("deleteVolumeOperation [%s]: failed to mark volume as failed: %v", volume.Name, err)
// Save failed, retry on the next deletion attempt
return
}
}
// Despite the volume being Failed, the controller will retry deleting
// the volume in every syncVolume() call.
return
@ -1168,7 +1177,7 @@ func (ctrl *PersistentVolumeController) doDeleteVolume(volume *api.PersistentVol
if err = deleter.Delete(); err != nil {
// Deleter failed
return false, fmt.Errorf("Delete of volume %q failed: %v", volume.Name, err)
return false, err
}
glog.V(2).Infof("volume %q deleted", volume.Name)

View File

@ -77,7 +77,7 @@ func TestDeleteSync(t *testing.T) {
// delete failure - delete() returns error
"8-5 - delete returns error",
newVolumeArray("volume8-5", "1Gi", "uid8-5", "claim8-5", api.VolumeBound, api.PersistentVolumeReclaimDelete),
withMessage("Delete of volume \"volume8-5\" failed: Mock delete error", newVolumeArray("volume8-5", "1Gi", "uid8-5", "claim8-5", api.VolumeFailed, api.PersistentVolumeReclaimDelete)),
withMessage("Mock delete error", newVolumeArray("volume8-5", "1Gi", "uid8-5", "claim8-5", api.VolumeFailed, api.PersistentVolumeReclaimDelete)),
noclaims,
noclaims,
[]string{"Warning VolumeFailedDelete"}, noerrors,

View File

@ -49,6 +49,8 @@ func (util *AWSDiskUtil) DeleteVolume(d *awsElasticBlockStoreDeleter) error {
deleted, err := cloud.DeleteDisk(d.volumeID)
if err != nil {
// AWS cloud provider returns volume.deletedVolumeInUseError when
// necessary, no handling needed here.
glog.V(2).Infof("Error deleting EBS Disk volume %s: %v", d.volumeID, err)
return err
}

View File

@ -124,6 +124,8 @@ func (util *CinderDiskUtil) DeleteVolume(cd *cinderVolumeDeleter) error {
}
if err = cloud.DeleteVolume(cd.pdName); err != nil {
// OpenStack cloud provider returns volume.tryAgainError when necessary,
// no handling needed here.
glog.V(2).Infof("Error deleting cinder volume %s: %v", cd.pdName, err)
return err
}

View File

@ -60,6 +60,8 @@ func (util *GCEDiskUtil) DeleteVolume(d *gcePersistentDiskDeleter) error {
if err = cloud.DeleteDisk(d.pdName); err != nil {
glog.V(2).Infof("Error deleting GCE PD volume %s: %v", d.pdName, err)
// GCE cloud provider returns volume.deletedVolumeInUseError when
// necessary, no handling needed here.
return err
}
glog.V(2).Infof("Successfully deleted GCE PD volume %s", d.pdName)

View File

@ -129,6 +129,12 @@ type Provisioner interface {
type Deleter interface {
Volume
// This method should block until completion.
// deletedVolumeInUseError returned from this function will not be reported
// as error and it will be sent as "Info" event to the PV being deleted. The
// volume controller will retry deleting the volume in the next periodic
// sync. This can be used to postpone deletion of a volume that is being
// dettached from a node. Deletion of such volume would fail anyway and such
// error would confuse users.
Delete() error
}
@ -171,6 +177,31 @@ type Detacher interface {
UnmountDevice(deviceMountPath string) error
}
// NewDeletedVolumeInUseError returns a new instance of DeletedVolumeInUseError
// error.
func NewDeletedVolumeInUseError(message string) error {
return deletedVolumeInUseError(message)
}
type deletedVolumeInUseError string
var _ error = deletedVolumeInUseError("")
// IsDeletedVolumeInUse returns true if an error returned from Delete() is
// deletedVolumeInUseError
func IsDeletedVolumeInUse(err error) bool {
switch err.(type) {
case deletedVolumeInUseError:
return true
default:
return false
}
}
func (err deletedVolumeInUseError) Error() string {
return string(err)
}
func RenameDirectory(oldPath, newName string) (string, error) {
newPath, err := ioutil.TempDir(path.Dir(oldPath), newName)
if err != nil {