Merge pull request #88444 from andyzhangx/azuredisk-remediator

fix: add remediation in azure disk attach/detach
This commit is contained in:
Kubernetes Prow Robot 2020-02-24 11:03:04 -08:00 committed by GitHub
commit 327dd9de0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 200 additions and 19 deletions

View File

@ -21,6 +21,7 @@ package azure
import (
"context"
"fmt"
"net/http"
"path"
"regexp"
"strings"
@ -345,6 +346,48 @@ func filterDetachingDisks(unfilteredDisks []compute.DataDisk) []compute.DataDisk
return filteredDisks
}
func (c *controllerCommon) filterNonExistingDisks(ctx context.Context, unfilteredDisks []compute.DataDisk) []compute.DataDisk {
filteredDisks := []compute.DataDisk{}
for _, disk := range unfilteredDisks {
filter := false
if disk.ManagedDisk != nil && disk.ManagedDisk.ID != nil {
diskURI := *disk.ManagedDisk.ID
exist, err := c.cloud.checkDiskExists(ctx, diskURI)
if err != nil {
klog.Errorf("checkDiskExists(%s) failed with error: %v", diskURI, err)
} else {
// only filter disk when checkDiskExists returns <false, nil>
filter = !exist
if filter {
klog.Errorf("disk(%s) does not exist, removed from data disk list", diskURI)
}
}
}
if !filter {
filteredDisks = append(filteredDisks, disk)
}
}
return filteredDisks
}
func (c *controllerCommon) checkDiskExists(ctx context.Context, diskURI string) (bool, error) {
diskName := path.Base(diskURI)
resourceGroup, err := getResourceGroupFromDiskURI(diskURI)
if err != nil {
return false, err
}
if _, rerr := c.cloud.DisksClient.Get(ctx, resourceGroup, diskName); rerr != nil {
if rerr.HTTPStatusCode == http.StatusNotFound {
return false, nil
}
return false, rerr.Error()
}
return true, nil
}
func getValidCreationData(subscriptionID, resourceGroup, sourceResourceID, sourceType string) (compute.CreationData, error) {
if sourceResourceID == "" {
return compute.CreationData{

View File

@ -427,3 +427,119 @@ func TestGetValidCreationData(t *testing.T) {
}
}
}
func TestCheckDiskExists(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx, cancel := getContextWithCancel()
defer cancel()
testCloud := GetTestCloud(ctrl)
common := &controllerCommon{
location: testCloud.Location,
storageEndpointSuffix: testCloud.Environment.StorageEndpointSuffix,
resourceGroup: testCloud.ResourceGroup,
subscriptionID: testCloud.SubscriptionID,
cloud: testCloud,
vmLockMap: newLockMap(),
}
// create a new disk before running test
newDiskName := "newdisk"
newDiskURI := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/disks/%s",
testCloud.SubscriptionID, testCloud.ResourceGroup, newDiskName)
fDC := newFakeDisksClient()
rerr := fDC.CreateOrUpdate(ctx, testCloud.ResourceGroup, newDiskName, compute.Disk{})
assert.Equal(t, rerr == nil, true, "return error: %v", rerr)
testCloud.DisksClient = fDC
testCases := []struct {
diskURI string
expectedResult bool
expectedErr bool
}{
{
diskURI: "incorrect disk URI format",
expectedResult: false,
expectedErr: true,
},
{
diskURI: "/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.Compute/disks/non-existing-disk",
expectedResult: false,
expectedErr: false,
},
{
diskURI: newDiskURI,
expectedResult: true,
expectedErr: false,
},
}
for i, test := range testCases {
exist, err := common.checkDiskExists(ctx, test.diskURI)
assert.Equal(t, test.expectedResult, exist, "TestCase[%d]", i, exist)
assert.Equal(t, test.expectedErr, err != nil, "TestCase[%d], return error: %v", i, err)
}
}
func TestFilterNonExistingDisks(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx, cancel := getContextWithCancel()
defer cancel()
testCloud := GetTestCloud(ctrl)
common := &controllerCommon{
location: testCloud.Location,
storageEndpointSuffix: testCloud.Environment.StorageEndpointSuffix,
resourceGroup: testCloud.ResourceGroup,
subscriptionID: testCloud.SubscriptionID,
cloud: testCloud,
vmLockMap: newLockMap(),
}
// create a new disk before running test
diskURIPrefix := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/disks/",
testCloud.SubscriptionID, testCloud.ResourceGroup)
newDiskName := "newdisk"
newDiskURI := diskURIPrefix + newDiskName
fDC := newFakeDisksClient()
rerr := fDC.CreateOrUpdate(ctx, testCloud.ResourceGroup, newDiskName, compute.Disk{})
assert.Equal(t, rerr == nil, true, "return error: %v", rerr)
testCloud.DisksClient = fDC
disks := []compute.DataDisk{
{
Name: &newDiskName,
ManagedDisk: &compute.ManagedDiskParameters{
ID: &newDiskURI,
},
},
{
Name: pointer.StringPtr("DiskName2"),
ManagedDisk: &compute.ManagedDiskParameters{
ID: pointer.StringPtr(diskURIPrefix + "DiskName2"),
},
},
{
Name: pointer.StringPtr("DiskName3"),
ManagedDisk: &compute.ManagedDiskParameters{
ID: pointer.StringPtr(diskURIPrefix + "DiskName3"),
},
},
{
Name: pointer.StringPtr("DiskName4"),
ManagedDisk: &compute.ManagedDiskParameters{
ID: pointer.StringPtr(diskURIPrefix + "DiskName4"),
},
},
}
filteredDisks := common.filterNonExistingDisks(ctx, disks)
assert.Equal(t, 1, len(filteredDisks))
assert.Equal(t, newDiskName, *filteredDisks[0].Name)
disks = []compute.DataDisk{}
filteredDisks = filterDetachingDisks(disks)
assert.Equal(t, 0, len(filteredDisks))
}

View File

@ -19,6 +19,7 @@ limitations under the License.
package azure
import (
"net/http"
"strings"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
@ -98,18 +99,18 @@ func (as *availabilitySet) AttachDisk(isManagedDisk bool, diskName, diskURI stri
rerr := as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "attach_disk")
if rerr != nil {
klog.Errorf("azureDisk - attach disk(%s, %s) failed, err: %v", diskName, diskURI, rerr)
detail := rerr.Error().Error()
if strings.Contains(detail, errLeaseFailed) || strings.Contains(detail, errDiskBlobNotFound) {
// if lease cannot be acquired or disk not found, immediately detach the disk and return the original error
klog.V(2).Infof("azureDisk - err %v, try detach disk(%s, %s)", rerr, diskName, diskURI)
as.DetachDisk(diskName, diskURI, nodeName)
}
klog.Errorf("azureDisk - attach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, vmName, rerr)
if rerr.HTTPStatusCode == http.StatusNotFound {
klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, vmName)
disks := as.filterNonExistingDisks(ctx, *newVM.VirtualMachineProperties.StorageProfile.DataDisks)
newVM.VirtualMachineProperties.StorageProfile.DataDisks = &disks
if rerr = as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "attach_disk"); rerr != nil {
return rerr.Error()
}
}
}
klog.V(2).Infof("azureDisk - attach disk(%s, %s) succeeded", diskName, diskURI)
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s) succeeded", nodeResourceGroup, vmName, diskName, diskURI)
return nil
}
@ -166,9 +167,18 @@ func (as *availabilitySet) DetachDisk(diskName, diskURI string, nodeName types.N
rerr := as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "detach_disk")
if rerr != nil {
klog.Errorf("azureDisk - detach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, vmName, rerr)
if rerr.HTTPStatusCode == http.StatusNotFound {
klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, vmName)
disks := as.filterNonExistingDisks(ctx, *vm.StorageProfile.DataDisks)
newVM.VirtualMachineProperties.StorageProfile.DataDisks = &disks
if rerr = as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "detach_disk"); rerr != nil {
return rerr.Error()
}
}
}
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s) succeeded", nodeResourceGroup, vmName, diskName, diskURI)
return nil
}

View File

@ -19,6 +19,7 @@ limitations under the License.
package azure
import (
"net/http"
"strings"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
@ -103,17 +104,18 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s) with DiskEncryptionSetID(%s)", nodeResourceGroup, nodeName, diskName, diskURI, diskEncryptionSetID)
rerr := ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "attach_disk")
if rerr != nil {
detail := rerr.Error().Error()
if strings.Contains(detail, errLeaseFailed) || strings.Contains(detail, errDiskBlobNotFound) {
// if lease cannot be acquired or disk not found, immediately detach the disk and return the original error
klog.Infof("azureDisk - err %s, try detach disk(%s, %s)", detail, diskName, diskURI)
ss.DetachDisk(diskName, diskURI, nodeName)
}
klog.Errorf("azureDisk - attach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, nodeName, rerr)
if rerr.HTTPStatusCode == http.StatusNotFound {
klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, nodeName)
disks := ss.filterNonExistingDisks(ctx, *newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks)
newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks = &disks
if rerr = ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "attach_disk"); rerr != nil {
return rerr.Error()
}
}
}
klog.V(2).Infof("azureDisk - attach disk(%s, %s) succeeded", diskName, diskURI)
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s) succeeded", nodeResourceGroup, nodeName, diskName, diskURI)
return nil
}
@ -174,8 +176,18 @@ func (ss *scaleSet) DetachDisk(diskName, diskURI string, nodeName types.NodeName
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s)", nodeResourceGroup, nodeName, diskName, diskURI)
rerr := ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "detach_disk")
if rerr != nil {
klog.Errorf("azureDisk - detach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, nodeName, rerr)
if rerr.HTTPStatusCode == http.StatusNotFound {
klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, nodeName)
disks := ss.filterNonExistingDisks(ctx, *newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks)
newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks = &disks
if rerr = ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "detach_disk"); rerr != nil {
return rerr.Error()
}
}
}
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s) succeeded", nodeResourceGroup, nodeName, diskName, diskURI)
return nil
}