diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_backoff_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_backoff_test.go index 707bdbdcaec..d4565f90ecc 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_backoff_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_backoff_test.go @@ -59,7 +59,7 @@ func TestGetVirtualMachineWithRetry(t *testing.T) { }, { vmClientErr: &retry.Error{HTTPStatusCode: http.StatusInternalServerError}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), }, } @@ -229,7 +229,7 @@ func TestCreateOrUpdateSecurityGroupCanceled(t *testing.T) { mockSGClient.EXPECT().Get(gomock.Any(), az.ResourceGroup, "sg", gomock.Any()).Return(network.SecurityGroup{}, nil) err := az.CreateOrUpdateSecurityGroup(network.SecurityGroup{Name: to.StringPtr("sg")}) - assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: canceledandsupersededduetoanotheroperation"), err) + assert.EqualError(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("canceledandsupersededduetoanotheroperation")), err.Error()) // security group should be removed from cache if the operation is canceled shouldBeEmpty, err := az.nsgCache.Get("sg", cache.CacheReadTypeDefault) @@ -249,15 +249,15 @@ func TestCreateOrUpdateLB(t *testing.T) { }{ { clientErr: &retry.Error{HTTPStatusCode: http.StatusPreconditionFailed}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 412, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 412, RawError: %w", error(nil)), }, { - clientErr: &retry.Error{RawError: fmt.Errorf(operationCanceledErrorMessage)}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: canceledandsupersededduetoanotheroperation"), + clientErr: &retry.Error{RawError: fmt.Errorf("canceledandsupersededduetoanotheroperation")}, + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("canceledandsupersededduetoanotheroperation")), }, { clientErr: &retry.Error{RawError: fmt.Errorf(referencedResourceNotProvisionedRawErrorString)}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %s", referencedResourceNotProvisionedRawErrorString), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf(referencedResourceNotProvisionedRawErrorString)), }, } @@ -301,7 +301,7 @@ func TestListLB(t *testing.T) { }{ { clientErr: &retry.Error{HTTPStatusCode: http.StatusInternalServerError}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), }, { clientErr: &retry.Error{HTTPStatusCode: http.StatusNotFound}, @@ -330,7 +330,7 @@ func TestListPIP(t *testing.T) { }{ { clientErr: &retry.Error{HTTPStatusCode: http.StatusInternalServerError}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), }, { clientErr: &retry.Error{HTTPStatusCode: http.StatusNotFound}, @@ -357,7 +357,7 @@ func TestCreateOrUpdatePIP(t *testing.T) { mockPIPClient.EXPECT().CreateOrUpdate(gomock.Any(), az.ResourceGroup, "nic", gomock.Any()).Return(&retry.Error{HTTPStatusCode: http.StatusInternalServerError}) err := az.CreateOrUpdatePIP(&v1.Service{}, az.ResourceGroup, network.PublicIPAddress{Name: to.StringPtr("nic")}) - assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), err) + assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), err) } func TestCreateOrUpdateInterface(t *testing.T) { @@ -369,7 +369,7 @@ func TestCreateOrUpdateInterface(t *testing.T) { mockInterfaceClient.EXPECT().CreateOrUpdate(gomock.Any(), az.ResourceGroup, "nic", gomock.Any()).Return(&retry.Error{HTTPStatusCode: http.StatusInternalServerError}) err := az.CreateOrUpdateInterface(&v1.Service{}, network.Interface{Name: to.StringPtr("nic")}) - assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), err) + assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), err) } func TestDeletePublicIP(t *testing.T) { @@ -381,7 +381,7 @@ func TestDeletePublicIP(t *testing.T) { mockPIPClient.EXPECT().Delete(gomock.Any(), az.ResourceGroup, "pip").Return(&retry.Error{HTTPStatusCode: http.StatusInternalServerError}) err := az.DeletePublicIP(&v1.Service{}, az.ResourceGroup, "pip") - assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), err) + assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), err) } func TestDeleteLB(t *testing.T) { @@ -393,7 +393,7 @@ func TestDeleteLB(t *testing.T) { mockLBClient.EXPECT().Delete(gomock.Any(), az.ResourceGroup, "lb").Return(&retry.Error{HTTPStatusCode: http.StatusInternalServerError}) err := az.DeleteLB(&v1.Service{}, "lb") - assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), err) + assert.Equal(t, fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), err) } func TestCreateOrUpdateRouteTable(t *testing.T) { @@ -406,11 +406,11 @@ func TestCreateOrUpdateRouteTable(t *testing.T) { }{ { clientErr: &retry.Error{HTTPStatusCode: http.StatusPreconditionFailed}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 412, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 412, RawError: %w", error(nil)), }, { - clientErr: &retry.Error{RawError: fmt.Errorf(operationCanceledErrorMessage)}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: canceledandsupersededduetoanotheroperation"), + clientErr: &retry.Error{RawError: fmt.Errorf("canceledandsupersededduetoanotheroperation")}, + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("canceledandsupersededduetoanotheroperation")), }, } @@ -445,11 +445,11 @@ func TestCreateOrUpdateRoute(t *testing.T) { }{ { clientErr: &retry.Error{HTTPStatusCode: http.StatusPreconditionFailed}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 412, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 412, RawError: %w", error(nil)), }, { - clientErr: &retry.Error{RawError: fmt.Errorf(operationCanceledErrorMessage)}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: canceledandsupersededduetoanotheroperation"), + clientErr: &retry.Error{RawError: fmt.Errorf("canceledandsupersededduetoanotheroperation")}, + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("canceledandsupersededduetoanotheroperation")), }, { clientErr: nil, @@ -489,7 +489,7 @@ func TestDeleteRouteWithName(t *testing.T) { }{ { clientErr: &retry.Error{HTTPStatusCode: http.StatusInternalServerError}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: "), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)), }, { clientErr: nil, diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_blobDiskController_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_blobDiskController_test.go index 53b41785d13..4dd4762cf8a 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_blobDiskController_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_blobDiskController_test.go @@ -86,8 +86,8 @@ func TestCreateVolume(t *testing.T) { b.common.cloud.StorageAccountClient = mockSAClient diskName, diskURI, requestGB, err := b.CreateVolume("testBlob", "testsa", "type", b.common.location, 10) - expectedErr := fmt.Errorf("could not get storage key for storage account testsa: could not get storage key for " + - "storage account testsa: Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: ") + expectedErr := fmt.Errorf("could not get storage key for storage account testsa: could not get storage key for "+ + "storage account testsa: Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)) assert.Equal(t, expectedErr, err) assert.Empty(t, diskName) assert.Empty(t, diskURI) @@ -123,7 +123,7 @@ func TestDeleteVolume(t *testing.T) { fakeDiskURL := "fake" diskURL := "https://foo.blob./vhds/bar.vhd" err := b.DeleteVolume(diskURL) - expectedErr := fmt.Errorf("no key for storage account foo, err Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: ") + expectedErr := fmt.Errorf("no key for storage account foo, err Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", error(nil)) assert.Equal(t, expectedErr, err) err = b.DeleteVolume(diskURL) diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss_test.go index ddd95b4bc92..5ca015df9b5 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss_test.go @@ -89,7 +89,7 @@ func TestAttachDiskWithVMSS(t *testing.T) { isManagedDisk: false, existedDisk: compute.Disk{Name: to.StringPtr("disk-name")}, expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 404, RawError: instance not found"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 404, RawError: %w", cloudprovider.InstanceNotFound), }, } @@ -177,7 +177,7 @@ func TestDetachDiskWithVMSS(t *testing.T) { vmssvmName: "vmss00-vm-000000", existedDisk: compute.Disk{Name: to.StringPtr(diskName)}, expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 404, RawError: instance not found"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 404, RawError: %w", cloudprovider.InstanceNotFound), }, { desc: "no error shall be returned if everything is good and the attaching disk does not match data disk", diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer.go index 749b4cc463f..972ed686d11 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer.go @@ -21,6 +21,7 @@ package azure import ( "context" + "errors" "fmt" "math" "reflect" @@ -305,7 +306,7 @@ func (az *Cloud) cleanBackendpoolForPrimarySLB(primarySLB *network.LoadBalancer, ipConf := (*bp.BackendIPConfigurations)[i] ipConfigID := to.String(ipConf.ID) _, vmSetName, err := az.VMSet.GetNodeNameByIPConfigurationID(ipConfigID) - if err != nil { + if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) { return nil, err } primaryVMSetName := az.VMSet.GetPrimaryVMSetName() @@ -1130,13 +1131,10 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service, for _, ipConf := range *bp.BackendIPConfigurations { ipConfID := to.String(ipConf.ID) nodeName, _, err := az.VMSet.GetNodeNameByIPConfigurationID(ipConfID) - if err != nil { + if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) { return nil, err } - if nodeName == "" { - // VM may under deletion - continue - } + // If a node is not supposed to be included in the LB, it // would not be in the `nodes` slice. We need to check the nodes that // have been added to the LB's backendpool, find the unwanted ones and diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer_test.go index 2e67909ddf1..a1fbc33efcf 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer_test.go @@ -31,6 +31,7 @@ import ( "testing" "time" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-06-01/network" "github.com/Azure/go-autorest/autorest/to" "github.com/golang/mock/gomock" @@ -38,6 +39,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + cloudprovider "k8s.io/cloud-provider" "k8s.io/legacy-cloud-providers/azure/clients/interfaceclient/mockinterfaceclient" "k8s.io/legacy-cloud-providers/azure/clients/loadbalancerclient/mockloadbalancerclient" "k8s.io/legacy-cloud-providers/azure/clients/publicipclient/mockpublicipclient" @@ -3783,6 +3785,62 @@ func TestCleanBackendpoolForPrimarySLB(t *testing.T) { assert.Equal(t, expectedLB, *cleanedLB) } +func TestCleanBackendpoolForPrimarySLBForInstanceNotFound(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + cloud := GetTestCloud(ctrl) + cloud.LoadBalancerSku = loadBalancerSkuStandard + cloud.EnableMultipleStandardLoadBalancers = true + cloud.PrimaryAvailabilitySetName = "agentpool1-availabilitySet-00000000" + clusterName := "testCluster" + service := getTestService("test", v1.ProtocolTCP, nil, false, 80) + lb := buildDefaultTestLB("testCluster", []string{ + "/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool1-00000000-nic-1/ipConfigurations/ipconfig1", + "/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool2-00000000-nic-1/ipConfigurations/ipconfig1", + }) + // existingVMForAS1 := buildDefaultTestVirtualMachine("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/availabilitySets/agentpool1-availabilitySet-00000000", []string{"/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool1-00000000-nic-1"}) + existingVMForAS2 := buildDefaultTestVirtualMachine("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/availabilitySets/agentpool2-availabilitySet-00000000", []string{"/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool2-00000000-nic-1"}) + existingNICForAS1 := buildDefaultTestInterface(true, []string{"/subscriptions/sub/resourceGroups/gh/providers/Microsoft.Network/loadBalancers/testCluster/backendAddressPools/testCluster"}) + existingNICForAS1.VirtualMachine = &network.SubResource{ + ID: to.StringPtr("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/k8s-agentpool1-00000000-1"), + } + existingNICForAS2 := buildDefaultTestInterface(true, []string{"/subscriptions/sub/resourceGroups/gh/providers/Microsoft.Network/loadBalancers/testCluster/backendAddressPools/testCluster"}) + existingNICForAS2.VirtualMachine = &network.SubResource{ + ID: to.StringPtr("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/k8s-agentpool2-00000000-1"), + } + mockVMClient := mockvmclient.NewMockInterface(ctrl) + mockVMClient.EXPECT().Get(gomock.Any(), cloud.ResourceGroup, "k8s-agentpool1-00000000-1", gomock.Any()).Return(compute.VirtualMachine{}, &retry.Error{RawError: cloudprovider.InstanceNotFound}) + mockVMClient.EXPECT().Get(gomock.Any(), cloud.ResourceGroup, "k8s-agentpool2-00000000-1", gomock.Any()).Return(existingVMForAS2, nil) + cloud.VirtualMachinesClient = mockVMClient + mockNICClient := mockinterfaceclient.NewMockInterface(ctrl) + mockNICClient.EXPECT().Get(gomock.Any(), "rg", "k8s-agentpool1-00000000-nic-1", gomock.Any()).Return(existingNICForAS1, nil) + mockNICClient.EXPECT().Get(gomock.Any(), "rg", "k8s-agentpool2-00000000-nic-1", gomock.Any()).Return(existingNICForAS2, nil).Times(3) + mockNICClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + cloud.InterfacesClient = mockNICClient + + expectedLB := network.LoadBalancer{ + Name: to.StringPtr("testCluster"), + LoadBalancerPropertiesFormat: &network.LoadBalancerPropertiesFormat{ + BackendAddressPools: &[]network.BackendAddressPool{ + { + Name: to.StringPtr("testCluster"), + BackendAddressPoolPropertiesFormat: &network.BackendAddressPoolPropertiesFormat{ + BackendIPConfigurations: &[]network.InterfaceIPConfiguration{ + { + ID: to.StringPtr("/subscriptions/subscription/resourceGroups/rg/providers/Microsoft.Network/networkInterfaces/k8s-agentpool1-00000000-nic-1/ipConfigurations/ipconfig1"), + }, + }, + }, + }, + }, + }, + } + + cleanedLB, err := cloud.cleanBackendpoolForPrimarySLB(&lb, &service, clusterName) + assert.NoError(t, err) + assert.Equal(t, expectedLB, *cleanedLB) +} + func buildDefaultTestLB(name string, backendIPConfigs []string) network.LoadBalancer { expectedLB := network.LoadBalancer{ Name: to.StringPtr(name), diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_managedDiskController_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_managedDiskController_test.go index 80bedfda97b..fad06ef16bc 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_managedDiskController_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_managedDiskController_test.go @@ -210,7 +210,7 @@ func TestDeleteManagedDisk(t *testing.T) { diskName: fakeGetDiskFailed, existedDisk: compute.Disk{Name: to.StringPtr(fakeGetDiskFailed)}, expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: Get Disk failed"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("Get Disk failed")), }, } @@ -264,7 +264,7 @@ func TestGetDisk(t *testing.T) { diskName: fakeGetDiskFailed, existedDisk: compute.Disk{Name: to.StringPtr(fakeGetDiskFailed)}, expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: Get Disk failed"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("Get Disk failed")), expectedProvisioningState: "", expectedDiskID: "", }, @@ -343,7 +343,7 @@ func TestResizeDisk(t *testing.T) { existedDisk: compute.Disk{Name: to.StringPtr(fakeGetDiskFailed), DiskProperties: &compute.DiskProperties{DiskSizeGB: &diskSizeGB, DiskState: compute.Unattached}}, expectedQuantity: *resource.NewQuantity(2*(1024*1024*1024), resource.BinarySI), expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: Get Disk failed"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("Get Disk failed")), }, { desc: "an error shall be returned if everything is good but create disk failed", @@ -353,7 +353,7 @@ func TestResizeDisk(t *testing.T) { existedDisk: compute.Disk{Name: to.StringPtr(fakeCreateDiskFailed), DiskProperties: &compute.DiskProperties{DiskSizeGB: &diskSizeGB, DiskState: compute.Unattached}}, expectedQuantity: *resource.NewQuantity(2*(1024*1024*1024), resource.BinarySI), expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: Create Disk failed"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("Create Disk failed")), }, { desc: "an error shall be returned if disk is not in Unattached state", @@ -485,7 +485,7 @@ func TestGetLabelsForVolume(t *testing.T) { }, existedDisk: compute.Disk{Name: to.StringPtr(fakeGetDiskFailed), DiskProperties: &compute.DiskProperties{DiskSizeGB: &diskSizeGB}, Zones: &[]string{"1"}}, expectedErr: true, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: Get Disk failed"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("Get Disk failed")), }, { desc: "an error shall be returned if everything is good with invalid DiskURI", diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_routes_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_routes_test.go index 27440fce47c..a85379410e5 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_routes_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_routes_test.go @@ -179,7 +179,7 @@ func TestCreateRoute(t *testing.T) { HTTPStatusCode: http.StatusInternalServerError, RawError: fmt.Errorf("CreateOrUpdate error"), }, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: CreateOrUpdate error"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", fmt.Errorf("CreateOrUpdate error")), }, { name: "CreateRoute should do nothing if route already exists", @@ -198,7 +198,7 @@ func TestCreateRoute(t *testing.T) { HTTPStatusCode: http.StatusInternalServerError, RawError: fmt.Errorf("CreateOrUpdate error"), }, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: CreateOrUpdate error"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", fmt.Errorf("CreateOrUpdate error")), }, { name: "CreateRoute should report error if error occurs when invoke getRouteTable for the second time", @@ -211,7 +211,7 @@ func TestCreateRoute(t *testing.T) { HTTPStatusCode: http.StatusInternalServerError, RawError: fmt.Errorf("Get error"), }, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: Get error"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", fmt.Errorf("Get error")), }, { name: "CreateRoute should report error if error occurs when invoke routeTableClient.Get", @@ -220,7 +220,7 @@ func TestCreateRoute(t *testing.T) { HTTPStatusCode: http.StatusInternalServerError, RawError: fmt.Errorf("Get error"), }, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: Get error"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", fmt.Errorf("Get error")), }, { name: "CreateRoute should report error if error occurs when invoke GetIPByNodeName", @@ -629,7 +629,7 @@ func TestListRoutes(t *testing.T) { HTTPStatusCode: http.StatusInternalServerError, RawError: fmt.Errorf("Get error"), }, - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: Get error"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", fmt.Errorf("Get error")), }, { name: "ListRoutes should report error if node informer is not synced", diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard.go index 985975bd25f..187c2d82284 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard.go @@ -459,6 +459,7 @@ func (as *availabilitySet) GetInstanceIDByNodeName(name string) (string, error) machine, err = as.getVirtualMachine(types.NodeName(name), azcache.CacheReadTypeUnsafe) if err == cloudprovider.InstanceNotFound { + klog.Warningf("Unable to find node %s: %v", name, cloudprovider.InstanceNotFound) return "", cloudprovider.InstanceNotFound } if err != nil { @@ -966,13 +967,16 @@ func (as *availabilitySet) EnsureBackendPoolDeleted(service *v1.Service, backend } } nicUpdaters := make([]func() error, 0) - errors := make([]error, 0) + allErrs := make([]error, 0) for i := range ipConfigurationIDs { ipConfigurationID := ipConfigurationIDs[i] nodeName, _, err := as.GetNodeNameByIPConfigurationID(ipConfigurationID) - if err != nil { + if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) { klog.Errorf("Failed to GetNodeNameByIPConfigurationID(%s): %v", ipConfigurationID, err) - errors = append(errors, err) + allErrs = append(allErrs, err) + continue + } + if nodeName == "" { continue } @@ -1039,9 +1043,9 @@ func (as *availabilitySet) EnsureBackendPoolDeleted(service *v1.Service, backend if errs != nil { return utilerrors.Flatten(errs) } - // Fail if there are other errors. - if len(errors) > 0 { - return utilerrors.Flatten(utilerrors.NewAggregate(errors)) + // Fail if there are other allErrs. + if len(allErrs) > 0 { + return utilerrors.Flatten(utilerrors.NewAggregate(allErrs)) } isOperationSucceeded = true @@ -1105,7 +1109,8 @@ func (as *availabilitySet) GetNodeNameByIPConfigurationID(ipConfigurationID stri vm, err := as.getVirtualMachine(types.NodeName(vmName), azcache.CacheReadTypeDefault) if err != nil { - return "", "", fmt.Errorf("cannot get the virtual machine by node name %s", vmName) + klog.Errorf("Unable to get the virtual machine by node name %s: %v", vmName, err) + return "", "", err } asID := "" if vm.VirtualMachineProperties != nil && vm.AvailabilitySet != nil { @@ -1117,7 +1122,7 @@ func (as *availabilitySet) GetNodeNameByIPConfigurationID(ipConfigurationID stri asName, err := getAvailabilitySetNameByID(asID) if err != nil { - return "", "", fmt.Errorf("cannot get the availability set name by the availability set ID %s", asID) + return "", "", fmt.Errorf("cannot get the availability set name by the availability set ID %s: %v", asID, err) } return vmName, strings.ToLower(asName), nil } diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard_test.go index 7cc07609db8..bb2178beeff 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard_test.go @@ -878,7 +878,7 @@ func TestGetStandardInstanceIDByNodeName(t *testing.T) { { name: "GetInstanceIDByNodeName should report error if Error encountered when invoke mockVMClient.Get", nodeName: "vm3", - expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: VMGet error"), + expectedErrMsg: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 500, RawError: %w", fmt.Errorf("VMGet error")), }, { name: "GetInstanceIDByNodeName should report error if ResourceID is invalid", diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss.go index 4b53632d078..92611567d97 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss.go @@ -196,6 +196,7 @@ func (ss *scaleSet) getVmssVMByNodeIdentity(node *nodeIdentity, crt azcache.Azur } if !found || vm == nil { + klog.Warningf("Unable to find node %s: %v", node.nodeName, cloudprovider.InstanceNotFound) return "", "", nil, cloudprovider.InstanceNotFound } return vmssName, instanceID, vm, nil @@ -343,6 +344,7 @@ func (ss *scaleSet) GetInstanceIDByNodeName(name string) (string, error) { _, _, vm, err := ss.getVmssVM(name, azcache.CacheReadTypeUnsafe) if err != nil { + klog.Errorf("Unable to find node %s: %v", name, err) return "", err } @@ -390,6 +392,7 @@ func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName, vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe) if err != nil { + klog.Errorf("Unable to find node by providerID %s: %v", providerID, err) return "", err } @@ -710,6 +713,7 @@ func (ss *scaleSet) getNodeIdentityByNodeName(nodeName string, crt azcache.Azure return nil, err } if node.vmssName == "" { + klog.Warningf("Unable to find node %s: %v", nodeName, cloudprovider.InstanceNotFound) return nil, cloudprovider.InstanceNotFound } return node, nil @@ -722,7 +726,7 @@ func (ss *scaleSet) listScaleSetVMs(scaleSetName, resourceGroup string) ([]compu allVMs, rerr := ss.VirtualMachineScaleSetVMsClient.List(ctx, resourceGroup, scaleSetName, string(compute.InstanceView)) if rerr != nil { - klog.Errorf("VirtualMachineScaleSetVMsClient.List failed: %v", rerr) + klog.Errorf("VirtualMachineScaleSetVMsClient.List(%s, %s) failed: %v", resourceGroup, scaleSetName, rerr) if rerr.IsNotFound() { return nil, cloudprovider.InstanceNotFound } @@ -961,6 +965,11 @@ func (ss *scaleSet) EnsureHostInPool(service *v1.Service, nodeName types.NodeNam vmName := mapNodeNameToVMName(nodeName) ssName, instanceID, vm, err := ss.getVmssVM(vmName, azcache.CacheReadTypeDefault) if err != nil { + if errors.Is(err, cloudprovider.InstanceNotFound) { + klog.Infof("EnsureHostInPool: skipping node %s because it is not found", vmName) + return "", "", "", nil, nil + } + return "", "", "", nil, err } @@ -1336,6 +1345,11 @@ func (ss *scaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac func (ss *scaleSet) ensureBackendPoolDeletedFromNode(nodeName, backendPoolID string) (string, string, string, *compute.VirtualMachineScaleSetVM, error) { ssName, instanceID, vm, err := ss.getVmssVM(nodeName, azcache.CacheReadTypeDefault) if err != nil { + if errors.Is(err, cloudprovider.InstanceNotFound) { + klog.Infof("ensureBackendPoolDeletedFromNode: skipping node %s because it is not found", nodeName) + return "", "", "", nil, nil + } + return "", "", "", nil, err } @@ -1408,7 +1422,7 @@ func (ss *scaleSet) GetNodeNameByIPConfigurationID(ipConfigurationID string) (st klog.V(4).Infof("Can not extract scale set name from ipConfigurationID (%s), assuming it is managed by availability set", ipConfigurationID) name, rg, err := ss.availabilitySet.GetNodeNameByIPConfigurationID(ipConfigurationID) - if err != nil { + if err != nil && !errors.Is(err, cloudprovider.InstanceNotFound) { klog.Errorf("GetNodeNameByIPConfigurationID: failed to invoke availabilitySet.GetNodeNameByIPConfigurationID: %s", err.Error()) return "", "", err } @@ -1420,6 +1434,7 @@ func (ss *scaleSet) GetNodeNameByIPConfigurationID(ipConfigurationID string) (st instanceID := matches[3] vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe) if err != nil { + klog.Errorf("Unable to find node by ipConfigurationID %s: %v", ipConfigurationID, err) return "", "", err } diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss_test.go index 0402493a55c..a756cc52eea 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss_test.go @@ -708,7 +708,7 @@ func TestGetVMSS(t *testing.T) { existedVMSSName: "vmss-1", vmssName: "vmss-1", vmssListError: &retry.Error{RawError: fmt.Errorf("error during vmss list")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error during vmss list"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error during vmss list")), }, } @@ -948,7 +948,7 @@ func TestGetInstanceTypeByNodeName(t *testing.T) { vmList: []string{"vmss-vm-000000"}, vmClientErr: &retry.Error{RawError: fmt.Errorf("error")}, expectedType: "", - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, } @@ -1070,7 +1070,7 @@ func TestGetPrimaryInterface(t *testing.T) { vmList: []string{"vmss-vm-000000"}, hasPrimaryInterface: true, vmClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, { description: "GetPrimaryInterface should report the error if vmss client returns retry error", @@ -1078,7 +1078,7 @@ func TestGetPrimaryInterface(t *testing.T) { vmList: []string{"vmss-vm-000000"}, hasPrimaryInterface: true, vmssClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, { description: "GetPrimaryInterface should report the error if there is no primary interface", @@ -1101,7 +1101,7 @@ func TestGetPrimaryInterface(t *testing.T) { vmList: []string{"vmss-vm-000000"}, hasPrimaryInterface: true, nicClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, { description: "GetPrimaryInterface should report the error if the NIC instance is not found", @@ -1176,7 +1176,7 @@ func TestGetVMSSPublicIPAddress(t *testing.T) { pipName: "pip", found: false, pipClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, { description: "GetVMSSPublicIPAddress should not report errors if the pip cannot be found", @@ -1232,7 +1232,7 @@ func TestGetPrivateIPsByNodeName(t *testing.T) { vmList: []string{"vmss-vm-000000"}, vmClientErr: &retry.Error{RawError: fmt.Errorf("error")}, expectedPrivateIPs: []string{}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, } @@ -1344,7 +1344,7 @@ func TestListScaleSets(t *testing.T) { { description: "listScaleSets should report the error if vmss client returns an retry.Error", vmssClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, } @@ -1381,7 +1381,7 @@ func TestListScaleSetVMs(t *testing.T) { { description: "listScaleSetVMs should report the error that the vmss vm client hits", vmssVMClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error"), + expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error")), }, } @@ -2145,7 +2145,7 @@ func TestEnsureHostsInPool(t *testing.T) { expectedVMSSVMPutTimes: 1, }, { - description: "EnsureHostsInPool should gather report the error if something goes wrong in EnsureHostInPool", + description: "EnsureHostsInPool should skip not found nodes", nodes: []*v1.Node{ { ObjectMeta: metav1.ObjectMeta{ @@ -2159,7 +2159,7 @@ func TestEnsureHostsInPool(t *testing.T) { backendpoolID: testLBBackendpoolID1, vmSetName: testVMSSName, expectedVMSSVMPutTimes: 0, - expectedErr: true, + expectedErr: false, }, } @@ -2205,9 +2205,8 @@ func TestEnsureBackendPoolDeletedFromNode(t *testing.T) { expectedErr error }{ { - description: "ensureBackendPoolDeletedFromNode should report the error that occurs during getVmssVM", + description: "ensureBackendPoolDeletedFromNode should skip not found nodes", nodeName: "vmss-vm-000001", - expectedErr: cloudprovider.InstanceNotFound, }, { description: "ensureBackendPoolDeletedFromNode skip the node if the VM's NIC config is nil", @@ -2336,7 +2335,7 @@ func TestEnsureBackendPoolDeletedFromVMSS(t *testing.T) { backendPoolID: testLBBackendpoolID0, expectedPutVMSS: true, vmssClientErr: &retry.Error{RawError: fmt.Errorf("error")}, - expectedErr: utilerrors.NewAggregate([]error{fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: error")}), + expectedErr: utilerrors.NewAggregate([]error{fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %w", fmt.Errorf("error"))}), }, } diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/clients/vmssvmclient/azure_vmssvmclient_test.go b/staging/src/k8s.io/legacy-cloud-providers/azure/clients/vmssvmclient/azure_vmssvmclient_test.go index 28b40bad57f..0b76ab0d101 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/clients/vmssvmclient/azure_vmssvmclient_test.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/clients/vmssvmclient/azure_vmssvmclient_test.go @@ -707,7 +707,7 @@ func TestUpdateVMsThrottle(t *testing.T) { vmssvmClient := getTestVMSSVMClient(armClient) rerr := vmssvmClient.UpdateVMs(context.TODO(), "rg", "vmss1", instances, "test") assert.NotNil(t, rerr) - assert.Equal(t, throttleErr.Error(), fmt.Errorf(rerr.RawError.Error())) + assert.EqualError(t, throttleErr.Error(), rerr.RawError.Error()) } func getTestVMSSVM(vmssName, instanceID string) compute.VirtualMachineScaleSetVM { diff --git a/staging/src/k8s.io/legacy-cloud-providers/azure/retry/azure_error.go b/staging/src/k8s.io/legacy-cloud-providers/azure/retry/azure_error.go index bbdd2fda642..00c069dfaf7 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/azure/retry/azure_error.go +++ b/staging/src/k8s.io/legacy-cloud-providers/azure/retry/azure_error.go @@ -77,7 +77,7 @@ func (err *Error) Error() error { retryAfterSeconds = int(err.RetryAfter.Sub(curTime) / time.Second) } - return fmt.Errorf("Retriable: %v, RetryAfter: %ds, HTTPStatusCode: %d, RawError: %v", + return fmt.Errorf("Retriable: %v, RetryAfter: %ds, HTTPStatusCode: %d, RawError: %w", err.Retriable, retryAfterSeconds, err.HTTPStatusCode, err.RawError) }