diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go index b900ded61a8..ec61cf0ca2b 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go @@ -21,6 +21,7 @@ package azure import ( "context" "fmt" + "net/http" "path" "regexp" "strings" @@ -345,6 +346,48 @@ func filterDetachingDisks(unfilteredDisks []compute.DataDisk) []compute.DataDisk return filteredDisks } +func (c *controllerCommon) filterNonExistingDisks(ctx context.Context, unfilteredDisks []compute.DataDisk) []compute.DataDisk { + filteredDisks := []compute.DataDisk{} + for _, disk := range unfilteredDisks { + filter := false + if disk.ManagedDisk != nil && disk.ManagedDisk.ID != nil { + diskURI := *disk.ManagedDisk.ID + exist, err := c.cloud.checkDiskExists(ctx, diskURI) + if err != nil { + klog.Errorf("checkDiskExists(%s) failed with error: %v", diskURI, err) + } else { + // only filter disk when checkDiskExists returns + filter = !exist + if filter { + klog.Errorf("disk(%s) does not exist, removed from data disk list", diskURI) + } + } + } + + if !filter { + filteredDisks = append(filteredDisks, disk) + } + } + return filteredDisks +} + +func (c *controllerCommon) checkDiskExists(ctx context.Context, diskURI string) (bool, error) { + diskName := path.Base(diskURI) + resourceGroup, err := getResourceGroupFromDiskURI(diskURI) + if err != nil { + return false, err + } + + if _, rerr := c.cloud.DisksClient.Get(ctx, resourceGroup, diskName); rerr != nil { + if rerr.HTTPStatusCode == http.StatusNotFound { + return false, nil + } + return false, rerr.Error() + } + + return true, nil +} + func getValidCreationData(subscriptionID, resourceGroup, sourceResourceID, sourceType string) (compute.CreationData, error) { if sourceResourceID == "" { return compute.CreationData{ diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common_test.go index bbd872ac319..4a50e5f5376 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common_test.go @@ -427,3 +427,119 @@ func TestGetValidCreationData(t *testing.T) { } } } + +func TestCheckDiskExists(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + ctx, cancel := getContextWithCancel() + defer cancel() + + testCloud := GetTestCloud(ctrl) + common := &controllerCommon{ + location: testCloud.Location, + storageEndpointSuffix: testCloud.Environment.StorageEndpointSuffix, + resourceGroup: testCloud.ResourceGroup, + subscriptionID: testCloud.SubscriptionID, + cloud: testCloud, + vmLockMap: newLockMap(), + } + // create a new disk before running test + newDiskName := "newdisk" + newDiskURI := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/disks/%s", + testCloud.SubscriptionID, testCloud.ResourceGroup, newDiskName) + fDC := newFakeDisksClient() + rerr := fDC.CreateOrUpdate(ctx, testCloud.ResourceGroup, newDiskName, compute.Disk{}) + assert.Equal(t, rerr == nil, true, "return error: %v", rerr) + testCloud.DisksClient = fDC + + testCases := []struct { + diskURI string + expectedResult bool + expectedErr bool + }{ + { + diskURI: "incorrect disk URI format", + expectedResult: false, + expectedErr: true, + }, + { + diskURI: "/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.Compute/disks/non-existing-disk", + expectedResult: false, + expectedErr: false, + }, + { + diskURI: newDiskURI, + expectedResult: true, + expectedErr: false, + }, + } + + for i, test := range testCases { + exist, err := common.checkDiskExists(ctx, test.diskURI) + assert.Equal(t, test.expectedResult, exist, "TestCase[%d]", i, exist) + assert.Equal(t, test.expectedErr, err != nil, "TestCase[%d], return error: %v", i, err) + } +} + +func TestFilterNonExistingDisks(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + ctx, cancel := getContextWithCancel() + defer cancel() + + testCloud := GetTestCloud(ctrl) + common := &controllerCommon{ + location: testCloud.Location, + storageEndpointSuffix: testCloud.Environment.StorageEndpointSuffix, + resourceGroup: testCloud.ResourceGroup, + subscriptionID: testCloud.SubscriptionID, + cloud: testCloud, + vmLockMap: newLockMap(), + } + // create a new disk before running test + diskURIPrefix := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/disks/", + testCloud.SubscriptionID, testCloud.ResourceGroup) + newDiskName := "newdisk" + newDiskURI := diskURIPrefix + newDiskName + fDC := newFakeDisksClient() + rerr := fDC.CreateOrUpdate(ctx, testCloud.ResourceGroup, newDiskName, compute.Disk{}) + assert.Equal(t, rerr == nil, true, "return error: %v", rerr) + testCloud.DisksClient = fDC + + disks := []compute.DataDisk{ + { + Name: &newDiskName, + ManagedDisk: &compute.ManagedDiskParameters{ + ID: &newDiskURI, + }, + }, + { + Name: pointer.StringPtr("DiskName2"), + ManagedDisk: &compute.ManagedDiskParameters{ + ID: pointer.StringPtr(diskURIPrefix + "DiskName2"), + }, + }, + { + Name: pointer.StringPtr("DiskName3"), + ManagedDisk: &compute.ManagedDiskParameters{ + ID: pointer.StringPtr(diskURIPrefix + "DiskName3"), + }, + }, + { + Name: pointer.StringPtr("DiskName4"), + ManagedDisk: &compute.ManagedDiskParameters{ + ID: pointer.StringPtr(diskURIPrefix + "DiskName4"), + }, + }, + } + + filteredDisks := common.filterNonExistingDisks(ctx, disks) + assert.Equal(t, 1, len(filteredDisks)) + assert.Equal(t, newDiskName, *filteredDisks[0].Name) + + disks = []compute.DataDisk{} + filteredDisks = filterDetachingDisks(disks) + assert.Equal(t, 0, len(filteredDisks)) +} diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_standard.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_standard.go index 024cd74936f..f65bead7263 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_standard.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_standard.go @@ -19,6 +19,7 @@ limitations under the License. package azure import ( + "net/http" "strings" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" @@ -98,18 +99,18 @@ func (as *availabilitySet) AttachDisk(isManagedDisk bool, diskName, diskURI stri rerr := as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "attach_disk") if rerr != nil { - klog.Errorf("azureDisk - attach disk(%s, %s) failed, err: %v", diskName, diskURI, rerr) - detail := rerr.Error().Error() - if strings.Contains(detail, errLeaseFailed) || strings.Contains(detail, errDiskBlobNotFound) { - // if lease cannot be acquired or disk not found, immediately detach the disk and return the original error - klog.V(2).Infof("azureDisk - err %v, try detach disk(%s, %s)", rerr, diskName, diskURI) - as.DetachDisk(diskName, diskURI, nodeName) + klog.Errorf("azureDisk - attach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, vmName, rerr) + if rerr.HTTPStatusCode == http.StatusNotFound { + klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, vmName) + disks := as.filterNonExistingDisks(ctx, *newVM.VirtualMachineProperties.StorageProfile.DataDisks) + newVM.VirtualMachineProperties.StorageProfile.DataDisks = &disks + if rerr = as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "attach_disk"); rerr != nil { + return rerr.Error() + } } - - return rerr.Error() } - klog.V(2).Infof("azureDisk - attach disk(%s, %s) succeeded", diskName, diskURI) + klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s) succeeded", nodeResourceGroup, vmName, diskName, diskURI) return nil } @@ -166,9 +167,18 @@ func (as *availabilitySet) DetachDisk(diskName, diskURI string, nodeName types.N rerr := as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "detach_disk") if rerr != nil { - return rerr.Error() + klog.Errorf("azureDisk - detach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, vmName, rerr) + if rerr.HTTPStatusCode == http.StatusNotFound { + klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, vmName) + disks := as.filterNonExistingDisks(ctx, *vm.StorageProfile.DataDisks) + newVM.VirtualMachineProperties.StorageProfile.DataDisks = &disks + if rerr = as.VirtualMachinesClient.Update(ctx, nodeResourceGroup, vmName, newVM, "detach_disk"); rerr != nil { + return rerr.Error() + } + } } + klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s) succeeded", nodeResourceGroup, vmName, diskName, diskURI) return nil } diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss.go index 30c3aef8845..6bbc45fa1a5 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss.go @@ -19,6 +19,7 @@ limitations under the License. package azure import ( + "net/http" "strings" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" @@ -103,17 +104,18 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s) with DiskEncryptionSetID(%s)", nodeResourceGroup, nodeName, diskName, diskURI, diskEncryptionSetID) rerr := ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "attach_disk") if rerr != nil { - detail := rerr.Error().Error() - if strings.Contains(detail, errLeaseFailed) || strings.Contains(detail, errDiskBlobNotFound) { - // if lease cannot be acquired or disk not found, immediately detach the disk and return the original error - klog.Infof("azureDisk - err %s, try detach disk(%s, %s)", detail, diskName, diskURI) - ss.DetachDisk(diskName, diskURI, nodeName) + klog.Errorf("azureDisk - attach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, nodeName, rerr) + if rerr.HTTPStatusCode == http.StatusNotFound { + klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, nodeName) + disks := ss.filterNonExistingDisks(ctx, *newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks) + newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks = &disks + if rerr = ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "attach_disk"); rerr != nil { + return rerr.Error() + } } - - return rerr.Error() } - klog.V(2).Infof("azureDisk - attach disk(%s, %s) succeeded", diskName, diskURI) + klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s) succeeded", nodeResourceGroup, nodeName, diskName, diskURI) return nil } @@ -174,9 +176,19 @@ func (ss *scaleSet) DetachDisk(diskName, diskURI string, nodeName types.NodeName klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s)", nodeResourceGroup, nodeName, diskName, diskURI) rerr := ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "detach_disk") if rerr != nil { - return rerr.Error() + klog.Errorf("azureDisk - detach disk(%s, %s) on rg(%s) vm(%s) failed, err: %v", diskName, diskURI, nodeResourceGroup, nodeName, rerr) + if rerr.HTTPStatusCode == http.StatusNotFound { + klog.Errorf("azureDisk - begin to filterNonExistingDisks(%s, %s) on rg(%s) vm(%s)", diskName, diskURI, nodeResourceGroup, nodeName) + disks := ss.filterNonExistingDisks(ctx, *newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks) + newVM.VirtualMachineScaleSetVMProperties.StorageProfile.DataDisks = &disks + if rerr = ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "detach_disk"); rerr != nil { + return rerr.Error() + } + } } + klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s) succeeded", nodeResourceGroup, nodeName, diskName, diskURI) + return nil }